Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 43ff2122 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers
Browse files

xfs: on-stack delayed write buffer lists



Queue delwri buffers on a local on-stack list instead of a per-buftarg one,
and write back the buffers per-process instead of by waking up xfsbufd.

This is now easily doable given that we have very few places left that write
delwri buffers:

 - log recovery:
	Only done at mount time, and already forcing out the buffers
	synchronously using xfs_flush_buftarg

 - quotacheck:
	Same story.

 - dquot reclaim:
	Writes out dirty dquots on the LRU under memory pressure.  We might
	want to look into doing more of this via xfsaild, but it's already
	more optimal than the synchronous inode reclaim that writes each
	buffer synchronously.

 - xfsaild:
	This is the main beneficiary of the change.  By keeping a local list
	of buffers to write we reduce latency of writing out buffers, and
	more importably we can remove all the delwri list promotions which
	were hitting the buffer cache hard under sustained metadata loads.

The implementation is very straight forward - xfs_buf_delwri_queue now gets
a new list_head pointer that it adds the delwri buffers to, and all callers
need to eventually submit the list using xfs_buf_delwi_submit or
xfs_buf_delwi_submit_nowait.  Buffers that already are on a delwri list are
skipped in xfs_buf_delwri_queue, assuming they already are on another delwri
list.  The biggest change to pass down the buffer list was done to the AIL
pushing. Now that we operate on buffers the trylock, push and pushbuf log
item methods are merged into a single push routine, which tries to lock the
item, and if possible add the buffer that needs writeback to the buffer list.
This leads to much simpler code than the previous split but requires the
individual IOP_PUSH instances to unlock and reacquire the AIL around calls
to blocking routines.

Given that xfsailds now also handle writing out buffers, the conditions for
log forcing and the sleep times needed some small changes.  The most
important one is that we consider an AIL busy as long we still have buffers
to push, and the other one is that we do increment the pushed LSN for
buffers that are under flushing at this moment, but still count them towards
the stuck items for restart purposes.  Without this we could hammer on stuck
items without ever forcing the log and not make progress under heavy random
delete workloads on fast flash storage devices.

[ Dave Chinner:
	- rebase on previous patches.
	- improved comments for XBF_DELWRI_Q handling
	- fix XBF_ASYNC handling in queue submission (test 106 failure)
	- rename delwri submit function buffer list parameters for clarity
	- xfs_efd_item_push() should return XFS_ITEM_PINNED ]

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarMark Tinguely <tinguely@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 960c60af
Loading
Loading
Loading
Loading
+131 −210
Original line number Diff line number Diff line
@@ -42,7 +42,6 @@
#include "xfs_trace.h"

static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);

static struct workqueue_struct *xfslogd_workqueue;

@@ -144,8 +143,17 @@ void
xfs_buf_stale(
	struct xfs_buf	*bp)
{
	ASSERT(xfs_buf_islocked(bp));

	bp->b_flags |= XBF_STALE;
	xfs_buf_delwri_dequeue(bp);

	/*
	 * Clear the delwri status so that a delwri queue walker will not
	 * flush this buffer to disk now that it is stale. The delwri queue has
	 * a reference to the buffer, so this is safe to do.
	 */
	bp->b_flags &= ~_XBF_DELWRI_Q;

	atomic_set(&(bp)->b_lru_ref, 0);
	if (!list_empty(&bp->b_lru)) {
		struct xfs_buftarg *btp = bp->b_target;
@@ -592,10 +600,10 @@ _xfs_buf_read(
{
	int			status;

	ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
	ASSERT(!(flags & XBF_WRITE));
	ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);

	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);

	status = xfs_buf_iorequest(bp);
@@ -855,7 +863,7 @@ xfs_buf_rele(
			spin_unlock(&pag->pag_buf_lock);
		} else {
			xfs_buf_lru_del(bp);
			ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
			ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
			rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
			spin_unlock(&pag->pag_buf_lock);
			xfs_perag_put(pag);
@@ -915,13 +923,6 @@ xfs_buf_lock(
	trace_xfs_buf_lock_done(bp, _RET_IP_);
}

/*
 *	Releases the lock on the buffer object.
 *	If the buffer is marked delwri but is not queued, do so before we
 *	unlock the buffer as we need to set flags correctly.  We also need to
 *	take a reference for the delwri queue because the unlocker is going to
 *	drop their's and they don't know we just queued it.
 */
void
xfs_buf_unlock(
	struct xfs_buf		*bp)
@@ -1019,10 +1020,11 @@ xfs_bwrite(
{
	int			error;

	ASSERT(xfs_buf_islocked(bp));

	bp->b_flags |= XBF_WRITE;
	bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);

	xfs_buf_delwri_dequeue(bp);
	xfs_bdstrat_cb(bp);

	error = xfs_buf_iowait(bp);
@@ -1254,7 +1256,7 @@ xfs_buf_iorequest(
{
	trace_xfs_buf_iorequest(bp, _RET_IP_);

	ASSERT(!(bp->b_flags & XBF_DELWRI));
	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));

	if (bp->b_flags & XBF_WRITE)
		xfs_buf_wait_unpin(bp);
@@ -1435,11 +1437,9 @@ xfs_free_buftarg(
{
	unregister_shrinker(&btp->bt_shrinker);

	xfs_flush_buftarg(btp, 1);
	if (mp->m_flags & XFS_MOUNT_BARRIER)
		xfs_blkdev_issue_flush(btp);

	kthread_stop(btp->bt_task);
	kmem_free(btp);
}

@@ -1491,20 +1491,6 @@ xfs_setsize_buftarg(
	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
}

STATIC int
xfs_alloc_delwri_queue(
	xfs_buftarg_t		*btp,
	const char		*fsname)
{
	INIT_LIST_HEAD(&btp->bt_delwri_queue);
	spin_lock_init(&btp->bt_delwri_lock);
	btp->bt_flags = 0;
	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
	if (IS_ERR(btp->bt_task))
		return PTR_ERR(btp->bt_task);
	return 0;
}

xfs_buftarg_t *
xfs_alloc_buftarg(
	struct xfs_mount	*mp,
@@ -1527,8 +1513,6 @@ xfs_alloc_buftarg(
	spin_lock_init(&btp->bt_lru_lock);
	if (xfs_setsize_buftarg_early(btp, bdev))
		goto error;
	if (xfs_alloc_delwri_queue(btp, fsname))
		goto error;
	btp->bt_shrinker.shrink = xfs_buftarg_shrink;
	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
	register_shrinker(&btp->bt_shrinker);
@@ -1539,125 +1523,52 @@ xfs_alloc_buftarg(
	return NULL;
}


/*
 *	Delayed write buffer handling
 * Add a buffer to the delayed write list.
 *
 * This queues a buffer for writeout if it hasn't already been.  Note that
 * neither this routine nor the buffer list submission functions perform
 * any internal synchronization.  It is expected that the lists are thread-local
 * to the callers.
 *
 * Returns true if we queued up the buffer, or false if it already had
 * been on the buffer list.
 */
void
bool
xfs_buf_delwri_queue(
	xfs_buf_t		*bp)
	struct xfs_buf		*bp,
	struct list_head	*list)
{
	struct xfs_buftarg	*btp = bp->b_target;

	trace_xfs_buf_delwri_queue(bp, _RET_IP_);

	ASSERT(xfs_buf_islocked(bp));
	ASSERT(!(bp->b_flags & XBF_READ));

	spin_lock(&btp->bt_delwri_lock);
	if (!list_empty(&bp->b_list)) {
		/* if already in the queue, move it to the tail */
		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
		list_move_tail(&bp->b_list, &btp->bt_delwri_queue);
	} else {
		/* start xfsbufd as it is about to have something to do */
		if (list_empty(&btp->bt_delwri_queue))
			wake_up_process(bp->b_target->bt_task);

		atomic_inc(&bp->b_hold);
		bp->b_flags |= XBF_DELWRI | _XBF_DELWRI_Q | XBF_ASYNC;
		list_add_tail(&bp->b_list, &btp->bt_delwri_queue);
	}
	bp->b_queuetime = jiffies;
	spin_unlock(&btp->bt_delwri_lock);
}

void
xfs_buf_delwri_dequeue(
	xfs_buf_t		*bp)
{
	int			dequeued = 0;

	spin_lock(&bp->b_target->bt_delwri_lock);
	if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
		ASSERT(bp->b_flags & _XBF_DELWRI_Q);
		list_del_init(&bp->b_list);
		dequeued = 1;
	}
	bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
	spin_unlock(&bp->b_target->bt_delwri_lock);

	if (dequeued)
		xfs_buf_rele(bp);

	trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
}

	/*
 * If a delwri buffer needs to be pushed before it has aged out, then promote
 * it to the head of the delwri queue so that it will be flushed on the next
 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
 * than the age currently needed to flush the buffer. Hence the next time the
 * xfsbufd sees it is guaranteed to be considered old enough to flush.
	 * If the buffer is already marked delwri it already is queued up
	 * by someone else for imediate writeout.  Just ignore it in that
	 * case.
	 */
void
xfs_buf_delwri_promote(
	struct xfs_buf	*bp)
{
	struct xfs_buftarg *btp = bp->b_target;
	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;

	ASSERT(bp->b_flags & XBF_DELWRI);
	ASSERT(bp->b_flags & _XBF_DELWRI_Q);

	/*
	 * Check the buffer age before locking the delayed write queue as we
	 * don't need to promote buffers that are already past the flush age.
	 */
	if (bp->b_queuetime < jiffies - age)
		return;
	bp->b_queuetime = jiffies - age;
	spin_lock(&btp->bt_delwri_lock);
	list_move(&bp->b_list, &btp->bt_delwri_queue);
	spin_unlock(&btp->bt_delwri_lock);
	if (bp->b_flags & _XBF_DELWRI_Q) {
		trace_xfs_buf_delwri_queued(bp, _RET_IP_);
		return false;
	}

	trace_xfs_buf_delwri_queue(bp, _RET_IP_);

	/*
 * Move as many buffers as specified to the supplied list
 * idicating if we skipped any buffers to prevent deadlocks.
	 * If a buffer gets written out synchronously or marked stale while it
	 * is on a delwri list we lazily remove it. To do this, the other party
	 * clears the  _XBF_DELWRI_Q flag but otherwise leaves the buffer alone.
	 * It remains referenced and on the list.  In a rare corner case it
	 * might get readded to a delwri list after the synchronous writeout, in
	 * which case we need just need to re-add the flag here.
	 */
STATIC int
xfs_buf_delwri_split(
	xfs_buftarg_t	*target,
	struct list_head *list,
	unsigned long	age)
{
	xfs_buf_t	*bp, *n;
	int		skipped = 0;
	int		force;

	force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
	INIT_LIST_HEAD(list);
	spin_lock(&target->bt_delwri_lock);
	list_for_each_entry_safe(bp, n, &target->bt_delwri_queue, b_list) {
		ASSERT(bp->b_flags & XBF_DELWRI);

		if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
			if (!force &&
			    time_before(jiffies, bp->b_queuetime + age)) {
				xfs_buf_unlock(bp);
				break;
			}

			bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
			bp->b_flags |= XBF_WRITE;
			list_move_tail(&bp->b_list, list);
			trace_xfs_buf_delwri_split(bp, _RET_IP_);
		} else
			skipped++;
	bp->b_flags |= _XBF_DELWRI_Q;
	if (list_empty(&bp->b_list)) {
		atomic_inc(&bp->b_hold);
		list_add_tail(&bp->b_list, list);
	}

	spin_unlock(&target->bt_delwri_lock);
	return skipped;
	return true;
}

/*
@@ -1683,99 +1594,109 @@ xfs_buf_cmp(
	return 0;
}

STATIC int
xfsbufd(
	void		*data)
static int
__xfs_buf_delwri_submit(
	struct list_head	*buffer_list,
	struct list_head	*io_list,
	bool			wait)
{
	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;

	current->flags |= PF_MEMALLOC;

	set_freezable();

	do {
		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
		struct list_head tmp;
	struct blk_plug		plug;
	struct xfs_buf		*bp, *n;
	int			pinned = 0;

		if (unlikely(freezing(current)))
			try_to_freeze();
	list_for_each_entry_safe(bp, n, buffer_list, b_list) {
		if (!wait) {
			if (xfs_buf_ispinned(bp)) {
				pinned++;
				continue;
			}
			if (!xfs_buf_trylock(bp))
				continue;
		} else {
			xfs_buf_lock(bp);
		}

		/* sleep for a long time if there is nothing to do. */
		if (list_empty(&target->bt_delwri_queue))
			tout = MAX_SCHEDULE_TIMEOUT;
		schedule_timeout_interruptible(tout);
		/*
		 * Someone else might have written the buffer synchronously or
		 * marked it stale in the meantime.  In that case only the
		 * _XBF_DELWRI_Q flag got cleared, and we have to drop the
		 * reference and remove it from the list here.
		 */
		if (!(bp->b_flags & _XBF_DELWRI_Q)) {
			list_del_init(&bp->b_list);
			xfs_buf_relse(bp);
			continue;
		}

		xfs_buf_delwri_split(target, &tmp, age);
		list_sort(NULL, &tmp, xfs_buf_cmp);
		list_move_tail(&bp->b_list, io_list);
		trace_xfs_buf_delwri_split(bp, _RET_IP_);
	}

	list_sort(NULL, io_list, xfs_buf_cmp);

	blk_start_plug(&plug);
		while (!list_empty(&tmp)) {
			struct xfs_buf *bp;
			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
	list_for_each_entry_safe(bp, n, io_list, b_list) {
		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
		bp->b_flags |= XBF_WRITE;

		if (!wait) {
			bp->b_flags |= XBF_ASYNC;
			list_del_init(&bp->b_list);
		}
		xfs_bdstrat_cb(bp);
	}
	blk_finish_plug(&plug);
	} while (!kthread_should_stop());

	return 0;
	return pinned;
}

/*
 *	Go through all incore buffers, and release buffers if they belong to
 *	the given device. This is used in filesystem error handling to
 *	preserve the consistency of its metadata.
 * Write out a buffer list asynchronously.
 *
 * This will take the @buffer_list, write all non-locked and non-pinned buffers
 * out and not wait for I/O completion on any of the buffers.  This interface
 * is only safely useable for callers that can track I/O completion by higher
 * level means, e.g. AIL pushing as the @buffer_list is consumed in this
 * function.
 */
int
xfs_flush_buftarg(
	xfs_buftarg_t	*target,
	int		wait)
xfs_buf_delwri_submit_nowait(
	struct list_head	*buffer_list)
{
	xfs_buf_t	*bp;
	int		pincount = 0;
	LIST_HEAD(tmp_list);
	LIST_HEAD(wait_list);
	struct blk_plug plug;

	flush_workqueue(xfslogd_workqueue);

	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
	LIST_HEAD		(io_list);
	return __xfs_buf_delwri_submit(buffer_list, &io_list, false);
}

/*
	 * Dropped the delayed write list lock, now walk the temporary list.
	 * All I/O is issued async and then if we need to wait for completion
	 * we do that after issuing all the IO.
 * Write out a buffer list synchronously.
 *
 * This will take the @buffer_list, write all buffers out and wait for I/O
 * completion on all of the buffers. @buffer_list is consumed by the function,
 * so callers must have some other way of tracking buffers if they require such
 * functionality.
 */
	list_sort(NULL, &tmp_list, xfs_buf_cmp);
int
xfs_buf_delwri_submit(
	struct list_head	*buffer_list)
{
	LIST_HEAD		(io_list);
	int			error = 0, error2;
	struct xfs_buf		*bp;

	blk_start_plug(&plug);
	while (!list_empty(&tmp_list)) {
		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
		ASSERT(target == bp->b_target);
		list_del_init(&bp->b_list);
		if (wait) {
			bp->b_flags &= ~XBF_ASYNC;
			list_add(&bp->b_list, &wait_list);
		}
		xfs_bdstrat_cb(bp);
	}
	blk_finish_plug(&plug);
	__xfs_buf_delwri_submit(buffer_list, &io_list, true);

	if (wait) {
	/* Wait for IO to complete. */
		while (!list_empty(&wait_list)) {
			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
	while (!list_empty(&io_list)) {
		bp = list_first_entry(&io_list, struct xfs_buf, b_list);

		list_del_init(&bp->b_list);
			xfs_buf_iowait(bp);
		error2 = xfs_buf_iowait(bp);
		xfs_buf_relse(bp);
		}
		if (!error)
			error = error2;
	}

	return pincount;
	return error;
}

int __init
+6 −22
Original line number Diff line number Diff line
@@ -49,8 +49,7 @@ typedef enum {
#define XBF_MAPPED	(1 << 3) /* buffer mapped (b_addr valid) */
#define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
#define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
#define XBF_DELWRI	(1 << 6) /* buffer has dirty pages */
#define XBF_STALE	(1 << 7) /* buffer has been staled, do not find it */
#define XBF_STALE	(1 << 6) /* buffer has been staled, do not find it */

/* I/O hints for the BIO layer */
#define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
@@ -65,7 +64,7 @@ typedef enum {
/* flags used only internally */
#define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM	(1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q	(1 << 22)/* buffer on delwri queue */
#define _XBF_DELWRI_Q	(1 << 22)/* buffer on a delwri queue */

typedef unsigned int xfs_buf_flags_t;

@@ -76,7 +75,6 @@ typedef unsigned int xfs_buf_flags_t;
	{ XBF_MAPPED,		"MAPPED" }, \
	{ XBF_ASYNC,		"ASYNC" }, \
	{ XBF_DONE,		"DONE" }, \
	{ XBF_DELWRI,		"DELWRI" }, \
	{ XBF_STALE,		"STALE" }, \
	{ XBF_SYNCIO,		"SYNCIO" }, \
	{ XBF_FUA,		"FUA" }, \
@@ -88,10 +86,6 @@ typedef unsigned int xfs_buf_flags_t;
	{ _XBF_KMEM,		"KMEM" }, \
	{ _XBF_DELWRI_Q,	"DELWRI_Q" }

typedef enum {
	XBT_FORCE_FLUSH = 0,
} xfs_buftarg_flags_t;

typedef struct xfs_buftarg {
	dev_t			bt_dev;
	struct block_device	*bt_bdev;
@@ -101,12 +95,6 @@ typedef struct xfs_buftarg {
	unsigned int		bt_sshift;
	size_t			bt_smask;

	/* per device delwri queue */
	struct task_struct	*bt_task;
	struct list_head	bt_delwri_queue;
	spinlock_t		bt_delwri_lock;
	unsigned long		bt_flags;

	/* LRU control structures */
	struct shrinker		bt_shrinker;
	struct list_head	bt_lru;
@@ -150,7 +138,6 @@ typedef struct xfs_buf {
	struct xfs_trans	*b_transp;
	struct page		**b_pages;	/* array of page pointers */
	struct page		*b_page_array[XB_PAGES]; /* inline pages */
	unsigned long		b_queuetime;	/* time buffer was queued */
	atomic_t		b_pin_count;	/* pin count */
	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
	unsigned int		b_page_count;	/* size of page array */
@@ -220,24 +207,22 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);

/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_queue(struct xfs_buf *);
extern void xfs_buf_delwri_dequeue(struct xfs_buf *);
extern void xfs_buf_delwri_promote(struct xfs_buf *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);

/* Buffer Daemon Setup Routines */
extern int xfs_buf_init(void);
extern void xfs_buf_terminate(void);

#define XFS_BUF_ZEROFLAGS(bp) \
	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH))

void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XBF_STALE)
#define XFS_BUF_ISSTALE(bp)	((bp)->b_flags & XBF_STALE)

#define XFS_BUF_ISDELAYWRITE(bp)	((bp)->b_flags & XBF_DELWRI)

#define XFS_BUF_DONE(bp)	((bp)->b_flags |= XBF_DONE)
#define XFS_BUF_UNDONE(bp)	((bp)->b_flags &= ~XBF_DONE)
#define XFS_BUF_ISDONE(bp)	((bp)->b_flags & XBF_DONE)
@@ -287,7 +272,6 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);

#define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
+26 −70
Original line number Diff line number Diff line
@@ -418,7 +418,6 @@ xfs_buf_item_unpin(
	if (freed && stale) {
		ASSERT(bip->bli_flags & XFS_BLI_STALE);
		ASSERT(xfs_buf_islocked(bp));
		ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
		ASSERT(XFS_BUF_ISSTALE(bp));
		ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);

@@ -469,34 +468,28 @@ xfs_buf_item_unpin(
	}
}

/*
 * This is called to attempt to lock the buffer associated with this
 * buf log item.  Don't sleep on the buffer lock.  If we can't get
 * the lock right away, return 0.  If we can get the lock, take a
 * reference to the buffer. If this is a delayed write buffer that
 * needs AIL help to be written back, invoke the pushbuf routine
 * rather than the normal success path.
 */
STATIC uint
xfs_buf_item_trylock(
	struct xfs_log_item	*lip)
xfs_buf_item_push(
	struct xfs_log_item	*lip,
	struct list_head	*buffer_list)
{
	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
	struct xfs_buf		*bp = bip->bli_buf;
	uint			rval = XFS_ITEM_SUCCESS;

	if (xfs_buf_ispinned(bp))
		return XFS_ITEM_PINNED;
	if (!xfs_buf_trylock(bp))
		return XFS_ITEM_LOCKED;

	/* take a reference to the buffer.  */
	xfs_buf_hold(bp);

	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
	trace_xfs_buf_item_trylock(bip);
	if (XFS_BUF_ISDELAYWRITE(bp))
		return XFS_ITEM_PUSHBUF;
	return XFS_ITEM_SUCCESS;

	trace_xfs_buf_item_push(bip);

	if (!xfs_buf_delwri_queue(bp, buffer_list))
		rval = XFS_ITEM_FLUSHING;
	xfs_buf_unlock(bp);
	return rval;
}

/*
@@ -609,48 +602,6 @@ xfs_buf_item_committed(
	return lsn;
}

/*
 * The buffer is locked, but is not a delayed write buffer.
 */
STATIC void
xfs_buf_item_push(
	struct xfs_log_item	*lip)
{
	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
	struct xfs_buf		*bp = bip->bli_buf;

	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
	ASSERT(!XFS_BUF_ISDELAYWRITE(bp));

	trace_xfs_buf_item_push(bip);

	xfs_buf_delwri_queue(bp);
	xfs_buf_relse(bp);
}

/*
 * The buffer is locked and is a delayed write buffer. Promote the buffer
 * in the delayed write queue as the caller knows that they must invoke
 * the xfsbufd to get this buffer written. We have to unlock the buffer
 * to allow the xfsbufd to write it, too.
 */
STATIC bool
xfs_buf_item_pushbuf(
	struct xfs_log_item	*lip)
{
	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
	struct xfs_buf		*bp = bip->bli_buf;

	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
	ASSERT(XFS_BUF_ISDELAYWRITE(bp));

	trace_xfs_buf_item_pushbuf(bip);

	xfs_buf_delwri_promote(bp);
	xfs_buf_relse(bp);
	return true;
}

STATIC void
xfs_buf_item_committing(
	struct xfs_log_item	*lip,
@@ -666,11 +617,9 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
	.iop_format	= xfs_buf_item_format,
	.iop_pin	= xfs_buf_item_pin,
	.iop_unpin	= xfs_buf_item_unpin,
	.iop_trylock	= xfs_buf_item_trylock,
	.iop_unlock	= xfs_buf_item_unlock,
	.iop_committed	= xfs_buf_item_committed,
	.iop_push	= xfs_buf_item_push,
	.iop_pushbuf	= xfs_buf_item_pushbuf,
	.iop_committing = xfs_buf_item_committing
};

@@ -989,20 +938,27 @@ xfs_buf_iodone_callbacks(
	 * If the write was asynchronous then no one will be looking for the
	 * error.  Clear the error state and write the buffer out again.
	 *
	 * During sync or umount we'll write all pending buffers again
	 * synchronous, which will catch these errors if they keep hanging
	 * around.
	 * XXX: This helps against transient write errors, but we need to find
	 * a way to shut the filesystem down if the writes keep failing.
	 *
	 * In practice we'll shut the filesystem down soon as non-transient
	 * erorrs tend to affect the whole device and a failing log write
	 * will make us give up.  But we really ought to do better here.
	 */
	if (XFS_BUF_ISASYNC(bp)) {
		ASSERT(bp->b_iodone != NULL);

		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);

		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */

		if (!XFS_BUF_ISSTALE(bp)) {
			xfs_buf_delwri_queue(bp);
			XFS_BUF_DONE(bp);
		}
		ASSERT(bp->b_iodone != NULL);
		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
			xfs_bdstrat_cb(bp);
		} else {
			xfs_buf_relse(bp);
		}

		return;
	}

+0 −33
Original line number Diff line number Diff line
@@ -1005,39 +1005,6 @@ xfs_dqlock2(
	}
}

/*
 * Give the buffer a little push if it is incore and
 * wait on the flush lock.
 */
void
xfs_dqflock_pushbuf_wait(
	xfs_dquot_t	*dqp)
{
	xfs_mount_t	*mp = dqp->q_mount;
	xfs_buf_t	*bp;

	/*
	 * Check to see if the dquot has been flushed delayed
	 * write.  If so, grab its buffer and send it
	 * out immediately.  We'll be able to acquire
	 * the flush lock when the I/O completes.
	 */
	bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
			mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
	if (!bp)
		goto out_lock;

	if (XFS_BUF_ISDELAYWRITE(bp)) {
		if (xfs_buf_ispinned(bp))
			xfs_log_force(mp, 0);
		xfs_buf_delwri_promote(bp);
		wake_up_process(bp->b_target->bt_task);
	}
	xfs_buf_relse(bp);
out_lock:
	xfs_dqflock(dqp);
}

int __init
xfs_qm_init(void)
{
+0 −1
Original line number Diff line number Diff line
@@ -152,7 +152,6 @@ extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
extern void		xfs_qm_dqput(xfs_dquot_t *);

extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
extern void		xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);

static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
{
Loading