Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 061f98e9 authored by Al Viro's avatar Al Viro
Browse files

Merge branch 'superblock-scaling' of...

Merge branch 'superblock-scaling' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into for-next

Conflicts:
	include/linux/fs.h
parents b5f5914c ac05fbb4
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -1769,7 +1769,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
{
	struct inode *inode, *old_inode = NULL;

	spin_lock(&inode_sb_list_lock);
	spin_lock(&blockdev_superblock->s_inode_list_lock);
	list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
		struct address_space *mapping = inode->i_mapping;

@@ -1781,13 +1781,13 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&blockdev_superblock->s_inode_list_lock);
		/*
		 * We hold a reference to 'inode' so it couldn't have been
		 * removed from s_inodes list while we dropped the
		 * inode_sb_list_lock.  We cannot iput the inode now as we can
		 * s_inode_list_lock  We cannot iput the inode now as we can
		 * be holding the last reference and we cannot iput it under
		 * inode_sb_list_lock. So we keep the reference and iput it
		 * s_inode_list_lock. So we keep the reference and iput it
		 * later.
		 */
		iput(old_inode);
@@ -1795,8 +1795,8 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)

		func(I_BDEV(inode), arg);

		spin_lock(&inode_sb_list_lock);
		spin_lock(&blockdev_superblock->s_inode_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&blockdev_superblock->s_inode_list_lock);
	iput(old_inode);
}
+6 −4
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
{
	struct inode *inode, *toput_inode = NULL;

	spin_lock(&inode_sb_list_lock);
	spin_lock(&sb->s_inode_list_lock);
	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
		spin_lock(&inode->i_lock);
		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
@@ -27,13 +27,15 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&sb->s_inode_list_lock);

		invalidate_mapping_pages(inode->i_mapping, 0, -1);
		iput(toput_inode);
		toput_inode = inode;
		spin_lock(&inode_sb_list_lock);

		spin_lock(&sb->s_inode_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);
	iput(toput_inode);
}

+43 −29
Original line number Diff line number Diff line
@@ -88,7 +88,7 @@ unsigned int dirtytime_expire_interval = 12 * 60 * 60;

static inline struct inode *wb_inode(struct list_head *head)
{
	return list_entry(head, struct inode, i_wb_list);
	return list_entry(head, struct inode, i_io_list);
}

/*
@@ -125,22 +125,22 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb)
}

/**
 * inode_wb_list_move_locked - move an inode onto a bdi_writeback IO list
 * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
 * @inode: inode to be moved
 * @wb: target bdi_writeback
 * @head: one of @wb->b_{dirty|io|more_io}
 *
 * Move @inode->i_wb_list to @list of @wb and set %WB_has_dirty_io.
 * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
 * Returns %true if @inode is the first occupant of the !dirty_time IO
 * lists; otherwise, %false.
 */
static bool inode_wb_list_move_locked(struct inode *inode,
static bool inode_io_list_move_locked(struct inode *inode,
				      struct bdi_writeback *wb,
				      struct list_head *head)
{
	assert_spin_locked(&wb->list_lock);

	list_move(&inode->i_wb_list, head);
	list_move(&inode->i_io_list, head);

	/* dirty_time doesn't count as dirty_io until expiration */
	if (head != &wb->b_dirty_time)
@@ -151,19 +151,19 @@ static bool inode_wb_list_move_locked(struct inode *inode,
}

/**
 * inode_wb_list_del_locked - remove an inode from its bdi_writeback IO list
 * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
 * @inode: inode to be removed
 * @wb: bdi_writeback @inode is being removed from
 *
 * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
 * clear %WB_has_dirty_io if all are empty afterwards.
 */
static void inode_wb_list_del_locked(struct inode *inode,
static void inode_io_list_del_locked(struct inode *inode,
				     struct bdi_writeback *wb)
{
	assert_spin_locked(&wb->list_lock);

	list_del_init(&inode->i_wb_list);
	list_del_init(&inode->i_io_list);
	wb_io_lists_depopulated(wb);
}

@@ -351,7 +351,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)

	/*
	 * Once I_FREEING is visible under i_lock, the eviction path owns
	 * the inode and we shouldn't modify ->i_wb_list.
	 * the inode and we shouldn't modify ->i_io_list.
	 */
	if (unlikely(inode->i_state & I_FREEING))
		goto skip_switch;
@@ -390,16 +390,16 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
	 * is always correct including from ->b_dirty_time.  The transfer
	 * preserves @inode->dirtied_when ordering.
	 */
	if (!list_empty(&inode->i_wb_list)) {
	if (!list_empty(&inode->i_io_list)) {
		struct inode *pos;

		inode_wb_list_del_locked(inode, old_wb);
		inode_io_list_del_locked(inode, old_wb);
		inode->i_wb = new_wb;
		list_for_each_entry(pos, &new_wb->b_dirty, i_wb_list)
		list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
			if (time_after_eq(inode->dirtied_when,
					  pos->dirtied_when))
				break;
		inode_wb_list_move_locked(inode, new_wb, pos->i_wb_list.prev);
		inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
	} else {
		inode->i_wb = new_wb;
	}
@@ -961,12 +961,12 @@ void wb_start_background_writeback(struct bdi_writeback *wb)
/*
 * Remove the inode from the writeback list it is on.
 */
void inode_wb_list_del(struct inode *inode)
void inode_io_list_del(struct inode *inode)
{
	struct bdi_writeback *wb;

	wb = inode_to_wb_and_lock_list(inode);
	inode_wb_list_del_locked(inode, wb);
	inode_io_list_del_locked(inode, wb);
	spin_unlock(&wb->list_lock);
}

@@ -988,7 +988,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
		if (time_before(inode->dirtied_when, tail->dirtied_when))
			inode->dirtied_when = jiffies;
	}
	inode_wb_list_move_locked(inode, wb, &wb->b_dirty);
	inode_io_list_move_locked(inode, wb, &wb->b_dirty);
}

/*
@@ -996,7 +996,7 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
 */
static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
{
	inode_wb_list_move_locked(inode, wb, &wb->b_more_io);
	inode_io_list_move_locked(inode, wb, &wb->b_more_io);
}

static void inode_sync_complete(struct inode *inode)
@@ -1055,7 +1055,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
		if (older_than_this &&
		    inode_dirtied_after(inode, *older_than_this))
			break;
		list_move(&inode->i_wb_list, &tmp);
		list_move(&inode->i_io_list, &tmp);
		moved++;
		if (flags & EXPIRE_DIRTY_ATIME)
			set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
@@ -1078,7 +1078,7 @@ static int move_expired_inodes(struct list_head *delaying_queue,
		list_for_each_prev_safe(pos, node, &tmp) {
			inode = wb_inode(pos);
			if (inode->i_sb == sb)
				list_move(&inode->i_wb_list, dispatch_queue);
				list_move(&inode->i_io_list, dispatch_queue);
		}
	}
out:
@@ -1232,10 +1232,10 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
		redirty_tail(inode, wb);
	} else if (inode->i_state & I_DIRTY_TIME) {
		inode->dirtied_when = jiffies;
		inode_wb_list_move_locked(inode, wb, &wb->b_dirty_time);
		inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
	} else {
		/* The inode is clean. Remove from writeback lists. */
		inode_wb_list_del_locked(inode, wb);
		inode_io_list_del_locked(inode, wb);
	}
}

@@ -1378,7 +1378,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
	 * touch it. See comment above for explanation.
	 */
	if (!(inode->i_state & I_DIRTY_ALL))
		inode_wb_list_del_locked(inode, wb);
		inode_io_list_del_locked(inode, wb);
	spin_unlock(&wb->list_lock);
	inode_sync_complete(inode);
out:
@@ -1439,7 +1439,9 @@ static long writeback_sb_inodes(struct super_block *sb,
	unsigned long start_time = jiffies;
	long write_chunk;
	long wrote = 0;  /* count both pages and inodes */
	struct blk_plug plug;

	blk_start_plug(&plug);
	while (!list_empty(&wb->b_io)) {
		struct inode *inode = wb_inode(wb->b_io.prev);

@@ -1537,6 +1539,7 @@ static long writeback_sb_inodes(struct super_block *sb,
				break;
		}
	}
	blk_finish_plug(&plug);
	return wrote;
}

@@ -2088,7 +2091,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
			else
				dirty_list = &wb->b_dirty_time;

			wakeup_bdi = inode_wb_list_move_locked(inode, wb,
			wakeup_bdi = inode_io_list_move_locked(inode, wb,
							       dirty_list);

			spin_unlock(&wb->list_lock);
@@ -2111,6 +2114,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
EXPORT_SYMBOL(__mark_inode_dirty);

/*
 * The @s_sync_lock is used to serialise concurrent sync operations
 * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
 * Concurrent callers will block on the s_sync_lock rather than doing contending
 * walks. The queueing maintains sync(2) required behaviour as all the IO that
 * has been issued up to the time this function is enter is guaranteed to be
 * completed by the time we have gained the lock and waited for all IO that is
 * in progress regardless of the order callers are granted the lock.
 */
static void wait_sb_inodes(struct super_block *sb)
{
	struct inode *inode, *old_inode = NULL;
@@ -2121,7 +2133,8 @@ static void wait_sb_inodes(struct super_block *sb)
	 */
	WARN_ON(!rwsem_is_locked(&sb->s_umount));

	spin_lock(&inode_sb_list_lock);
	mutex_lock(&sb->s_sync_lock);
	spin_lock(&sb->s_inode_list_lock);

	/*
	 * Data integrity sync. Must wait for all pages under writeback,
@@ -2141,14 +2154,14 @@ static void wait_sb_inodes(struct super_block *sb)
		}
		__iget(inode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&sb->s_inode_list_lock);

		/*
		 * We hold a reference to 'inode' so it couldn't have been
		 * removed from s_inodes list while we dropped the
		 * inode_sb_list_lock.  We cannot iput the inode now as we can
		 * s_inode_list_lock.  We cannot iput the inode now as we can
		 * be holding the last reference and we cannot iput it under
		 * inode_sb_list_lock. So we keep the reference and iput it
		 * s_inode_list_lock. So we keep the reference and iput it
		 * later.
		 */
		iput(old_inode);
@@ -2158,10 +2171,11 @@ static void wait_sb_inodes(struct super_block *sb)

		cond_resched();

		spin_lock(&inode_sb_list_lock);
		spin_lock(&sb->s_inode_list_lock);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);
	iput(old_inode);
	mutex_unlock(&sb->s_sync_lock);
}

static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
+31 −19
Original line number Diff line number Diff line
@@ -28,16 +28,16 @@
 *   inode->i_state, inode->i_hash, __iget()
 * Inode LRU list locks protect:
 *   inode->i_sb->s_inode_lru, inode->i_lru
 * inode_sb_list_lock protects:
 *   sb->s_inodes, inode->i_sb_list
 * inode->i_sb->s_inode_list_lock protects:
 *   inode->i_sb->s_inodes, inode->i_sb_list
 * bdi->wb.list_lock protects:
 *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
 *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
 * inode_hash_lock protects:
 *   inode_hashtable, inode->i_hash
 *
 * Lock ordering:
 *
 * inode_sb_list_lock
 * inode->i_sb->s_inode_list_lock
 *   inode->i_lock
 *     Inode LRU list locks
 *
@@ -45,7 +45,7 @@
 *   inode->i_lock
 *
 * inode_hash_lock
 *   inode_sb_list_lock
 *   inode->i_sb->s_inode_list_lock
 *   inode->i_lock
 *
 * iunique_lock
@@ -57,8 +57,6 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);

__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);

/*
 * Empty aops. Can be used for the cases where the user does not
 * define any of the address_space operations.
@@ -359,7 +357,7 @@ void inode_init_once(struct inode *inode)
	memset(inode, 0, sizeof(*inode));
	INIT_HLIST_NODE(&inode->i_hash);
	INIT_LIST_HEAD(&inode->i_devices);
	INIT_LIST_HEAD(&inode->i_wb_list);
	INIT_LIST_HEAD(&inode->i_io_list);
	INIT_LIST_HEAD(&inode->i_lru);
	address_space_init_once(&inode->i_data);
	i_size_ordered_init(inode);
@@ -426,18 +424,18 @@ static void inode_lru_list_del(struct inode *inode)
 */
void inode_sb_list_add(struct inode *inode)
{
	spin_lock(&inode_sb_list_lock);
	spin_lock(&inode->i_sb->s_inode_list_lock);
	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&inode->i_sb->s_inode_list_lock);
}
EXPORT_SYMBOL_GPL(inode_sb_list_add);

static inline void inode_sb_list_del(struct inode *inode)
{
	if (!list_empty(&inode->i_sb_list)) {
		spin_lock(&inode_sb_list_lock);
		spin_lock(&inode->i_sb->s_inode_list_lock);
		list_del_init(&inode->i_sb_list);
		spin_unlock(&inode_sb_list_lock);
		spin_unlock(&inode->i_sb->s_inode_list_lock);
	}
}

@@ -527,8 +525,8 @@ static void evict(struct inode *inode)
	BUG_ON(!(inode->i_state & I_FREEING));
	BUG_ON(!list_empty(&inode->i_lru));

	if (!list_empty(&inode->i_wb_list))
		inode_wb_list_del(inode);
	if (!list_empty(&inode->i_io_list))
		inode_io_list_del(inode);

	inode_sb_list_del(inode);

@@ -577,6 +575,7 @@ static void dispose_list(struct list_head *head)
		list_del_init(&inode->i_lru);

		evict(inode);
		cond_resched();
	}
}

@@ -594,7 +593,8 @@ void evict_inodes(struct super_block *sb)
	struct inode *inode, *next;
	LIST_HEAD(dispose);

	spin_lock(&inode_sb_list_lock);
again:
	spin_lock(&sb->s_inode_list_lock);
	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
		if (atomic_read(&inode->i_count))
			continue;
@@ -609,8 +609,20 @@ void evict_inodes(struct super_block *sb)
		inode_lru_list_del(inode);
		spin_unlock(&inode->i_lock);
		list_add(&inode->i_lru, &dispose);

		/*
		 * We can have a ton of inodes to evict at unmount time given
		 * enough memory, check to see if we need to go to sleep for a
		 * bit so we don't livelock.
		 */
		if (need_resched()) {
			spin_unlock(&sb->s_inode_list_lock);
			cond_resched();
			dispose_list(&dispose);
			goto again;
		}
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);

	dispose_list(&dispose);
}
@@ -631,7 +643,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
	struct inode *inode, *next;
	LIST_HEAD(dispose);

	spin_lock(&inode_sb_list_lock);
	spin_lock(&sb->s_inode_list_lock);
	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
		spin_lock(&inode->i_lock);
		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
@@ -654,7 +666,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
		spin_unlock(&inode->i_lock);
		list_add(&inode->i_lru, &dispose);
	}
	spin_unlock(&inode_sb_list_lock);
	spin_unlock(&sb->s_inode_list_lock);

	dispose_list(&dispose);

@@ -890,7 +902,7 @@ struct inode *new_inode(struct super_block *sb)
{
	struct inode *inode;

	spin_lock_prefetch(&inode_sb_list_lock);
	spin_lock_prefetch(&sb->s_inode_list_lock);

	inode = new_inode_pseudo(sb);
	if (inode)
+1 −2
Original line number Diff line number Diff line
@@ -112,14 +112,13 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
/*
 * inode.c
 */
extern spinlock_t inode_sb_list_lock;
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
extern void inode_add_lru(struct inode *inode);

/*
 * fs-writeback.c
 */
extern void inode_wb_list_del(struct inode *inode);
extern void inode_io_list_del(struct inode *inode);

extern long get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
Loading