Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 98b745c6 authored by Dave Chinner's avatar Dave Chinner Committed by Al Viro
Browse files

inode: Make unused inode LRU per superblock



The inode unused list is currently a global LRU. This does not match
the other global filesystem cache - the dentry cache - which uses
per-superblock LRU lists. Hence we have related filesystem object
types using different LRU reclaimation schemes.

To enable a per-superblock filesystem cache shrinker, both of these
caches need to have per-sb unused object LRU lists. Hence this patch
converts the global inode LRU to per-sb LRUs.

The patch only does rudimentary per-sb propotioning in the shrinker
infrastructure, as this gets removed when the per-sb shrinker
callouts are introduced later on.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent fcb94f72
Loading
Loading
Loading
Loading
+80 −11
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@
 * inode->i_lock protects:
 *   inode->i_state, inode->i_hash, __iget()
 * inode_lru_lock protects:
 *   inode_lru, inode->i_lru
 *   inode->i_sb->s_inode_lru, inode->i_lru
 * inode_sb_list_lock protects:
 *   sb->s_inodes, inode->i_sb_list
 * inode_wb_list_lock protects:
@@ -64,7 +64,6 @@ static unsigned int i_hash_shift __read_mostly;
static struct hlist_head *inode_hashtable __read_mostly;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);

static LIST_HEAD(inode_lru);
static DEFINE_SPINLOCK(inode_lru_lock);

__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
@@ -345,7 +344,8 @@ static void inode_lru_list_add(struct inode *inode)
{
	spin_lock(&inode_lru_lock);
	if (list_empty(&inode->i_lru)) {
		list_add(&inode->i_lru, &inode_lru);
		list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
		inode->i_sb->s_nr_inodes_unused++;
		this_cpu_inc(nr_unused);
	}
	spin_unlock(&inode_lru_lock);
@@ -356,6 +356,7 @@ static void inode_lru_list_del(struct inode *inode)
	spin_lock(&inode_lru_lock);
	if (!list_empty(&inode->i_lru)) {
		list_del_init(&inode->i_lru);
		inode->i_sb->s_nr_inodes_unused--;
		this_cpu_dec(nr_unused);
	}
	spin_unlock(&inode_lru_lock);
@@ -628,21 +629,20 @@ static int can_unuse(struct inode *inode)
 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
 * with this flag set because they are the inodes that are out of order.
 */
static void prune_icache(int nr_to_scan)
static void shrink_icache_sb(struct super_block *sb, int *nr_to_scan)
{
	LIST_HEAD(freeable);
	int nr_scanned;
	unsigned long reap = 0;

	down_read(&iprune_sem);
	spin_lock(&inode_lru_lock);
	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
	for (nr_scanned = *nr_to_scan; nr_scanned >= 0; nr_scanned--) {
		struct inode *inode;

		if (list_empty(&inode_lru))
		if (list_empty(&sb->s_inode_lru))
			break;

		inode = list_entry(inode_lru.prev, struct inode, i_lru);
		inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);

		/*
		 * we are inverting the inode_lru_lock/inode->i_lock here,
@@ -650,7 +650,7 @@ static void prune_icache(int nr_to_scan)
		 * inode to the back of the list so we don't spin on it.
		 */
		if (!spin_trylock(&inode->i_lock)) {
			list_move(&inode->i_lru, &inode_lru);
			list_move(&inode->i_lru, &sb->s_inode_lru);
			continue;
		}

@@ -662,6 +662,7 @@ static void prune_icache(int nr_to_scan)
		    (inode->i_state & ~I_REFERENCED)) {
			list_del_init(&inode->i_lru);
			spin_unlock(&inode->i_lock);
			sb->s_nr_inodes_unused--;
			this_cpu_dec(nr_unused);
			continue;
		}
@@ -669,7 +670,7 @@ static void prune_icache(int nr_to_scan)
		/* recently referenced inodes get one more pass */
		if (inode->i_state & I_REFERENCED) {
			inode->i_state &= ~I_REFERENCED;
			list_move(&inode->i_lru, &inode_lru);
			list_move(&inode->i_lru, &sb->s_inode_lru);
			spin_unlock(&inode->i_lock);
			continue;
		}
@@ -683,7 +684,7 @@ static void prune_icache(int nr_to_scan)
			iput(inode);
			spin_lock(&inode_lru_lock);

			if (inode != list_entry(inode_lru.next,
			if (inode != list_entry(sb->s_inode_lru.next,
						struct inode, i_lru))
				continue;	/* wrong inode or list_empty */
			/* avoid lock inversions with trylock */
@@ -699,6 +700,7 @@ static void prune_icache(int nr_to_scan)
		spin_unlock(&inode->i_lock);

		list_move(&inode->i_lru, &freeable);
		sb->s_nr_inodes_unused--;
		this_cpu_dec(nr_unused);
	}
	if (current_is_kswapd())
@@ -706,8 +708,75 @@ static void prune_icache(int nr_to_scan)
	else
		__count_vm_events(PGINODESTEAL, reap);
	spin_unlock(&inode_lru_lock);
	*nr_to_scan = nr_scanned;

	dispose_list(&freeable);
}

static void prune_icache(int count)
{
	struct super_block *sb, *p = NULL;
	int w_count;
	int unused = inodes_stat.nr_unused;
	int prune_ratio;
	int pruned;

	if (unused == 0 || count == 0)
		return;
	down_read(&iprune_sem);
	if (count >= unused)
		prune_ratio = 1;
	else
		prune_ratio = unused / count;
	spin_lock(&sb_lock);
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (list_empty(&sb->s_instances))
			continue;
		if (sb->s_nr_inodes_unused == 0)
			continue;
		sb->s_count++;
		/* Now, we reclaim unused dentrins with fairness.
		 * We reclaim them same percentage from each superblock.
		 * We calculate number of dentries to scan on this sb
		 * as follows, but the implementation is arranged to avoid
		 * overflows:
		 * number of dentries to scan on this sb =
		 * count * (number of dentries on this sb /
		 * number of dentries in the machine)
		 */
		spin_unlock(&sb_lock);
		if (prune_ratio != 1)
			w_count = (sb->s_nr_inodes_unused / prune_ratio) + 1;
		else
			w_count = sb->s_nr_inodes_unused;
		pruned = w_count;
		/*
		 * We need to be sure this filesystem isn't being unmounted,
		 * otherwise we could race with generic_shutdown_super(), and
		 * end up holding a reference to an inode while the filesystem
		 * is unmounted.  So we try to get s_umount, and make sure
		 * s_root isn't NULL.
		 */
		if (down_read_trylock(&sb->s_umount)) {
			if ((sb->s_root != NULL) &&
			    (!list_empty(&sb->s_dentry_lru))) {
				shrink_icache_sb(sb, &w_count);
				pruned -= w_count;
			}
			up_read(&sb->s_umount);
		}
		spin_lock(&sb_lock);
		if (p)
			__put_super(p);
		count -= pruned;
		p = sb;
		/* more work left to do? */
		if (count <= 0)
			break;
	}
	if (p)
		__put_super(p);
	spin_unlock(&sb_lock);
	up_read(&iprune_sem);
}

+1 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
		INIT_HLIST_BL_HEAD(&s->s_anon);
		INIT_LIST_HEAD(&s->s_inodes);
		INIT_LIST_HEAD(&s->s_dentry_lru);
		INIT_LIST_HEAD(&s->s_inode_lru);
		init_rwsem(&s->s_umount);
		mutex_init(&s->s_lock);
		lockdep_set_class(&s->s_umount, &type->s_umount_key);
+4 −0
Original line number Diff line number Diff line
@@ -1397,6 +1397,10 @@ struct super_block {
	struct list_head	s_dentry_lru;	/* unused dentry lru */
	int			s_nr_dentry_unused;	/* # of dentry on lru */

	/* inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */
	struct list_head	s_inode_lru;		/* unused inode lru */
	int			s_nr_inodes_unused;	/* # of inodes on lru */

	struct block_device	*s_bdev;
	struct backing_dev_info *s_bdi;
	struct mtd_info		*s_mtd;