Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a66979ab authored by Dave Chinner's avatar Dave Chinner Committed by Al Viro
Browse files

fs: move i_wb_list out from under inode_lock



Protect the inode writeback list with a new global lock
inode_wb_list_lock and use it to protect the list manipulations and
traversals. This lock replaces the inode_lock as the inodes on the
list can be validity checked while holding the inode->i_lock and
hence the inode_lock is no longer needed to protect the list.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 55fa6091
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -55,13 +55,13 @@ EXPORT_SYMBOL(I_BDEV);
static void bdev_inode_switch_bdi(struct inode *inode,
			struct backing_dev_info *dst)
{
	spin_lock(&inode_lock);
	spin_lock(&inode_wb_list_lock);
	spin_lock(&inode->i_lock);
	inode->i_data.backing_dev_info = dst;
	if (inode->i_state & I_DIRTY)
		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
	spin_unlock(&inode->i_lock);
	spin_unlock(&inode_lock);
	spin_unlock(&inode_wb_list_lock);
}

static sector_t max_block(struct block_device *bdev)
+44 −32
Original line number Diff line number Diff line
@@ -175,6 +175,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
	spin_unlock_bh(&bdi->wb_lock);
}

/*
 * Remove the inode from the writeback list it is on.
 */
void inode_wb_list_del(struct inode *inode)
{
	spin_lock(&inode_wb_list_lock);
	list_del_init(&inode->i_wb_list);
	spin_unlock(&inode_wb_list_lock);
}


/*
 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
 * furthest end of its superblock's dirty-inode list.
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode)
{
	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;

	assert_spin_locked(&inode_wb_list_lock);
	if (!list_empty(&wb->b_dirty)) {
		struct inode *tail;

@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode)
{
	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;

	assert_spin_locked(&inode_wb_list_lock);
	list_move(&inode->i_wb_list, &wb->b_more_io);
}

static void inode_sync_complete(struct inode *inode)
{
	/*
	 * Prevent speculative execution through spin_unlock(&inode_lock);
	 * Prevent speculative execution through
	 * spin_unlock(&inode_wb_list_lock);
	 */

	smp_mb();
	wake_up_bit(&inode->i_state, __I_SYNC);
}
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 */
static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
{
	assert_spin_locked(&inode_wb_list_lock);
	list_splice_init(&wb->b_more_io, &wb->b_io);
	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
}
@@ -308,25 +324,23 @@ static void inode_wait_for_writeback(struct inode *inode)
	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
	while (inode->i_state & I_SYNC) {
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_lock);
		spin_unlock(&inode_wb_list_lock);
		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
		spin_lock(&inode_lock);
		spin_lock(&inode_wb_list_lock);
		spin_lock(&inode->i_lock);
	}
}

/*
 * Write out an inode's dirty pages.  Called under inode_lock.  Either the
 * caller has ref on the inode (either via __iget or via syscall against an fd)
 * or the inode has I_WILL_FREE set (via generic_forget_inode)
 * Write out an inode's dirty pages.  Called under inode_wb_list_lock.  Either
 * the caller has an active reference on the inode or the inode has I_WILL_FREE
 * set.
 *
 * If `wait' is set, wait on the writeout.
 *
 * The whole writeout design is quite complex and fragile.  We want to avoid
 * starvation of particular inodes when others are being redirtied, prevent
 * livelocks, etc.
 *
 * Called under inode_lock.
 */
static int
writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -368,7 +382,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
	inode->i_state |= I_SYNC;
	inode->i_state &= ~I_DIRTY_PAGES;
	spin_unlock(&inode->i_lock);
	spin_unlock(&inode_lock);
	spin_unlock(&inode_wb_list_lock);

	ret = do_writepages(mapping, wbc);

@@ -388,12 +402,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
	 * due to delalloc, clear dirty metadata flags right before
	 * write_inode()
	 */
	spin_lock(&inode_lock);
	spin_lock(&inode->i_lock);
	dirty = inode->i_state & I_DIRTY;
	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
	spin_unlock(&inode->i_lock);
	spin_unlock(&inode_lock);
	/* Don't write the inode if only I_DIRTY_PAGES was set */
	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
		int err = write_inode(inode, wbc);
@@ -401,7 +413,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
			ret = err;
	}

	spin_lock(&inode_lock);
	spin_lock(&inode_wb_list_lock);
	spin_lock(&inode->i_lock);
	inode->i_state &= ~I_SYNC;
	if (!(inode->i_state & I_FREEING)) {
@@ -543,10 +555,10 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
			 */
			redirty_tail(inode);
		}
		spin_unlock(&inode_lock);
		spin_unlock(&inode_wb_list_lock);
		iput(inode);
		cond_resched();
		spin_lock(&inode_lock);
		spin_lock(&inode_wb_list_lock);
		if (wbc->nr_to_write <= 0) {
			wbc->more_io = 1;
			return 1;
@@ -565,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,

	if (!wbc->wb_start)
		wbc->wb_start = jiffies; /* livelock avoidance */
	spin_lock(&inode_lock);
	spin_lock(&inode_wb_list_lock);
	if (!wbc->for_kupdate || list_empty(&wb->b_io))
		queue_io(wb, wbc->older_than_this);

@@ -583,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
		if (ret)
			break;
	}
	spin_unlock(&inode_lock);
	spin_unlock(&inode_wb_list_lock);
	/* Leave any unwritten inodes on b_io */
}

@@ -592,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb,
{
	WARN_ON(!rwsem_is_locked(&sb->s_umount));

	spin_lock(&inode_lock);
	spin_lock(&inode_wb_list_lock);
	if (!wbc->for_kupdate || list_empty(&wb->b_io))
		queue_io(wb, wbc->older_than_this);
	writeback_sb_inodes(sb, wb, wbc, true);
	spin_unlock(&inode_lock);
	spin_unlock(&inode_wb_list_lock);
}

/*
@@ -735,7 +747,7 @@ static long wb_writeback(struct bdi_writeback *wb,
		 * become available for writeback. Otherwise
		 * we'll just busyloop.
		 */
		spin_lock(&inode_lock);
		spin_lock(&inode_wb_list_lock);
		if (!list_empty(&wb->b_more_io))  {
			inode = wb_inode(wb->b_more_io.prev);
			trace_wbc_writeback_wait(&wbc, wb->bdi);
@@ -743,7 +755,7 @@ static long wb_writeback(struct bdi_writeback *wb,
			inode_wait_for_writeback(inode);
			spin_unlock(&inode->i_lock);
		}
		spin_unlock(&inode_lock);
		spin_unlock(&inode_wb_list_lock);
	}

	return wrote;
@@ -1009,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
{
	struct super_block *sb = inode->i_sb;
	struct backing_dev_info *bdi = NULL;
	bool wakeup_bdi = false;

	/*
	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -1033,7 +1044,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
	if (unlikely(block_dump))
		block_dump___mark_inode_dirty(inode);

	spin_lock(&inode_lock);
	spin_lock(&inode->i_lock);
	if ((inode->i_state & flags) != flags) {
		const int was_dirty = inode->i_state & I_DIRTY;
@@ -1059,12 +1069,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
		if (inode->i_state & I_FREEING)
			goto out_unlock_inode;

		spin_unlock(&inode->i_lock);
		/*
		 * If the inode was already on b_dirty/b_io/b_more_io, don't
		 * reposition it (that would break b_dirty time-ordering).
		 */
		if (!was_dirty) {
			bool wakeup_bdi = false;
			bdi = inode_to_bdi(inode);

			if (bdi_cap_writeback_dirty(bdi)) {
@@ -1081,18 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
					wakeup_bdi = true;
			}

			spin_unlock(&inode->i_lock);
			spin_lock(&inode_wb_list_lock);
			inode->dirtied_when = jiffies;
			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
			spin_unlock(&inode_wb_list_lock);

			if (wakeup_bdi)
				bdi_wakeup_thread_delayed(bdi);
			return;
		}
		goto out;
	}
out_unlock_inode:
	spin_unlock(&inode->i_lock);
out:
	spin_unlock(&inode_lock);

	if (wakeup_bdi)
		bdi_wakeup_thread_delayed(bdi);
}
EXPORT_SYMBOL(__mark_inode_dirty);

@@ -1296,9 +1308,9 @@ int write_inode_now(struct inode *inode, int sync)
		wbc.nr_to_write = 0;

	might_sleep();
	spin_lock(&inode_lock);
	spin_lock(&inode_wb_list_lock);
	ret = writeback_single_inode(inode, &wbc);
	spin_unlock(&inode_lock);
	spin_unlock(&inode_wb_list_lock);
	if (sync)
		inode_sync_wait(inode);
	return ret;
@@ -1320,9 +1332,9 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
	int ret;

	spin_lock(&inode_lock);
	spin_lock(&inode_wb_list_lock);
	ret = writeback_single_inode(inode, wbc);
	spin_unlock(&inode_lock);
	spin_unlock(&inode_wb_list_lock);
	return ret;
}
EXPORT_SYMBOL(sync_inode);
+8 −4
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/posix_acl.h>
#include <linux/ima.h>
#include <linux/cred.h>
#include "internal.h"

/*
 * inode locking rules.
@@ -36,6 +37,8 @@
 *   inode_lru, inode->i_lru
 * inode_sb_list_lock protects:
 *   sb->s_inodes, inode->i_sb_list
 * inode_wb_list_lock protects:
 *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
 *
 * Lock ordering:
 * inode_lock
@@ -44,6 +47,9 @@
 * inode_sb_list_lock
 *   inode->i_lock
 *     inode_lru_lock
 *
 * inode_wb_list_lock
 *   inode->i_lock
 */

/*
@@ -105,6 +111,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
DEFINE_SPINLOCK(inode_lock);

__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);

/*
 * iprune_sem provides exclusion between the icache shrinking and the
@@ -483,10 +490,7 @@ static void evict(struct inode *inode)
	BUG_ON(!(inode->i_state & I_FREEING));
	BUG_ON(!list_empty(&inode->i_lru));

	spin_lock(&inode_lock);
	list_del_init(&inode->i_wb_list);
	spin_unlock(&inode_lock);

	inode_wb_list_del(inode);
	inode_sb_list_del(inode);

	if (op->evict_inode) {
+5 −0
Original line number Diff line number Diff line
@@ -127,6 +127,11 @@ extern long do_handle_open(int mountdirfd,
 */
extern spinlock_t inode_sb_list_lock;

/*
 * fs-writeback.c
 */
extern void inode_wb_list_del(struct inode *inode);

extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
extern int invalidate_inodes(struct super_block *, bool);
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
struct backing_dev_info;

extern spinlock_t inode_lock;
extern spinlock_t inode_wb_list_lock;

/*
 * fs/fs-writeback.c
Loading