Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9bf729c0 authored by Dave Chinner's avatar Dave Chinner Committed by Alex Elder
Browse files

xfs: add a shrinker to background inode reclaim



On low memory boxes or those with highmem, kernel can OOM before the
background reclaims inodes via xfssyncd. Add a shrinker to run inode
reclaim so that it inode reclaim is expedited when memory is low.

This is more complex than it needs to be because the VM folk don't
want a context added to the shrinker infrastructure. Hence we need
to add a global list of XFS mount structures so the shrinker can
traverse them.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 79dba2ea
Loading
Loading
Loading
Loading
+5 −0
Original line number Original line Diff line number Diff line
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(


	xfs_unmountfs(mp);
	xfs_unmountfs(mp);
	xfs_freesb(mp);
	xfs_freesb(mp);
	xfs_inode_shrinker_unregister(mp);
	xfs_icsb_destroy_counters(mp);
	xfs_icsb_destroy_counters(mp);
	xfs_close_devices(mp);
	xfs_close_devices(mp);
	xfs_dmops_put(mp);
	xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
	if (error)
	if (error)
		goto fail_vnrele;
		goto fail_vnrele;


	xfs_inode_shrinker_register(mp);

	kfree(mtpt);
	kfree(mtpt);
	return 0;
	return 0;


@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
		goto out_cleanup_procfs;
		goto out_cleanup_procfs;


	vfs_initquota();
	vfs_initquota();
	xfs_inode_shrinker_init();


	error = register_filesystem(&xfs_fs_type);
	error = register_filesystem(&xfs_fs_type);
	if (error)
	if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
{
{
	vfs_exitquota();
	vfs_exitquota();
	unregister_filesystem(&xfs_fs_type);
	unregister_filesystem(&xfs_fs_type);
	xfs_inode_shrinker_destroy();
	xfs_sysctl_unregister();
	xfs_sysctl_unregister();
	xfs_cleanup_procfs();
	xfs_cleanup_procfs();
	xfs_buf_terminate();
	xfs_buf_terminate();
+105 −7
Original line number Original line Diff line number Diff line
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
					   struct xfs_perag *pag, int flags),
					   struct xfs_perag *pag, int flags),
	int			flags,
	int			flags,
	int			tag,
	int			tag,
	int			exclusive)
	int			exclusive,
	int			*nr_to_scan)
{
{
	uint32_t		first_index;
	uint32_t		first_index;
	int			last_error = 0;
	int			last_error = 0;
@@ -134,7 +135,7 @@ xfs_inode_ag_walk(
		if (error == EFSCORRUPTED)
		if (error == EFSCORRUPTED)
			break;
			break;


	} while (1);
	} while ((*nr_to_scan)--);


	if (skipped) {
	if (skipped) {
		delay(1);
		delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
					   struct xfs_perag *pag, int flags),
					   struct xfs_perag *pag, int flags),
	int			flags,
	int			flags,
	int			tag,
	int			tag,
	int			exclusive)
	int			exclusive,
	int			*nr_to_scan)
{
{
	int			error = 0;
	int			error = 0;
	int			last_error = 0;
	int			last_error = 0;
	xfs_agnumber_t		ag;
	xfs_agnumber_t		ag;
	int			nr;


	nr = nr_to_scan ? *nr_to_scan : INT_MAX;
	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
		struct xfs_perag	*pag;
		struct xfs_perag	*pag;


@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
			continue;
			continue;
		}
		}
		error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
		error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
						exclusive);
						exclusive, &nr);
		xfs_perag_put(pag);
		xfs_perag_put(pag);
		if (error) {
		if (error) {
			last_error = error;
			last_error = error;
			if (error == EFSCORRUPTED)
			if (error == EFSCORRUPTED)
				break;
				break;
		}
		}
		if (nr <= 0)
			break;
	}
	}
	if (nr_to_scan)
		*nr_to_scan = nr;
	return XFS_ERROR(last_error);
	return XFS_ERROR(last_error);
}
}


@@ -291,7 +299,7 @@ xfs_sync_data(
	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);


	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
				      XFS_ICI_NO_TAG, 0);
				      XFS_ICI_NO_TAG, 0, NULL);
	if (error)
	if (error)
		return XFS_ERROR(error);
		return XFS_ERROR(error);


@@ -310,7 +318,7 @@ xfs_sync_attr(
	ASSERT((flags & ~SYNC_WAIT) == 0);
	ASSERT((flags & ~SYNC_WAIT) == 0);


	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
				     XFS_ICI_NO_TAG, 0);
				     XFS_ICI_NO_TAG, 0, NULL);
}
}


STATIC int
STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
	radix_tree_tag_set(&pag->pag_ici_root,
	radix_tree_tag_set(&pag->pag_ici_root,
			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
			   XFS_ICI_RECLAIM_TAG);
			   XFS_ICI_RECLAIM_TAG);
	pag->pag_ici_reclaimable++;
}
}


/*
/*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
{
{
	radix_tree_tag_clear(&pag->pag_ici_root,
	radix_tree_tag_clear(&pag->pag_ici_root,
			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
	pag->pag_ici_reclaimable--;
}
}


/*
/*
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
	int		mode)
	int		mode)
{
{
	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
					XFS_ICI_RECLAIM_TAG, 1);
					XFS_ICI_RECLAIM_TAG, 1, NULL);
}

/*
 * Shrinker infrastructure.
 *
 * This is all far more complex than it needs to be. It adds a global list of
 * mounts because the shrinkers can only call a global context. We need to make
 * the shrinkers pass a context to avoid the need for global state.
 */
static LIST_HEAD(xfs_mount_list);
static struct rw_semaphore xfs_mount_list_lock;

static int
xfs_reclaim_inode_shrink(
	int		nr_to_scan,
	gfp_t		gfp_mask)
{
	struct xfs_mount *mp;
	struct xfs_perag *pag;
	xfs_agnumber_t	ag;
	int		reclaimable = 0;

	if (nr_to_scan) {
		if (!(gfp_mask & __GFP_FS))
			return -1;

		down_read(&xfs_mount_list_lock);
		list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
			xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
					XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
			if (nr_to_scan <= 0)
				break;
		}
		up_read(&xfs_mount_list_lock);
	}

	down_read(&xfs_mount_list_lock);
	list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {

			pag = xfs_perag_get(mp, ag);
			if (!pag->pag_ici_init) {
				xfs_perag_put(pag);
				continue;
			}
			reclaimable += pag->pag_ici_reclaimable;
			xfs_perag_put(pag);
		}
	}
	up_read(&xfs_mount_list_lock);
	return reclaimable;
}

static struct shrinker xfs_inode_shrinker = {
	.shrink = xfs_reclaim_inode_shrink,
	.seeks = DEFAULT_SEEKS,
};

void __init
xfs_inode_shrinker_init(void)
{
	init_rwsem(&xfs_mount_list_lock);
	register_shrinker(&xfs_inode_shrinker);
}

void
xfs_inode_shrinker_destroy(void)
{
	ASSERT(list_empty(&xfs_mount_list));
	unregister_shrinker(&xfs_inode_shrinker);
}

void
xfs_inode_shrinker_register(
	struct xfs_mount	*mp)
{
	down_write(&xfs_mount_list_lock);
	list_add_tail(&mp->m_mplist, &xfs_mount_list);
	up_write(&xfs_mount_list_lock);
}

void
xfs_inode_shrinker_unregister(
	struct xfs_mount	*mp)
{
	down_write(&xfs_mount_list_lock);
	list_del(&mp->m_mplist);
	up_write(&xfs_mount_list_lock);
}
}
+6 −1
Original line number Original line Diff line number Diff line
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int xfs_inode_ag_iterator(struct xfs_mount *mp,
	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
	int flags, int tag, int write_lock);
	int flags, int tag, int write_lock, int *nr_to_scan);

void xfs_inode_shrinker_init(void);
void xfs_inode_shrinker_destroy(void);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);


#endif
#endif
+2 −1
Original line number Original line Diff line number Diff line
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
	uint		 flags)
	uint		 flags)
{
{
	ASSERT(mp->m_quotainfo);
	ASSERT(mp->m_quotainfo);
	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
				XFS_ICI_NO_TAG, 0, NULL);
}
}


/*------------------------------------------------------------------------*/
/*------------------------------------------------------------------------*/
+1 −0
Original line number Original line Diff line number Diff line
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
	int		pag_ici_init;	/* incore inode cache initialised */
	int		pag_ici_init;	/* incore inode cache initialised */
	rwlock_t	pag_ici_lock;	/* incore inode lock */
	rwlock_t	pag_ici_lock;	/* incore inode lock */
	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
	int		pag_ici_reclaimable;	/* reclaimable inodes */
#endif
#endif
	int		pagb_count;	/* pagb slots in use */
	int		pagb_count;	/* pagb slots in use */
	xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS];	/* unstable blocks */
	xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS];	/* unstable blocks */
Loading