Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e176579e authored by Dave Chinner's avatar Dave Chinner Committed by Alex Elder
Browse files

xfs: lockless per-ag lookups



When we start taking a reference to the per-ag for every cached
buffer in the system, kernel lockstat profiling on an 8-way create
workload shows the mp->m_perag_lock has higher acquisition rates
than the inode lock and has significantly more contention. That is,
it becomes the highest contended lock in the system.

The perag lookup is trivial to convert to lock-less RCU lookups
because perag structures never go away. Hence the only thing we need
to protect against is tree structure changes during a grow. This can
be done simply by replacing the locking in xfs_perag_get() with RCU
read locking. This removes the mp->m_perag_lock completely from this
path.

Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarAlex Elder <aelder@sgi.com>
parent bd32d25a
Loading
Loading
Loading
Loading
+3 −3
Original line number Original line Diff line number Diff line
@@ -150,17 +150,17 @@ xfs_inode_ag_iter_next_pag(
		int found;
		int found;
		int ref;
		int ref;


		spin_lock(&mp->m_perag_lock);
		rcu_read_lock();
		found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
		found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
				(void **)&pag, *first, 1, tag);
				(void **)&pag, *first, 1, tag);
		if (found <= 0) {
		if (found <= 0) {
			spin_unlock(&mp->m_perag_lock);
			rcu_read_unlock();
			return NULL;
			return NULL;
		}
		}
		*first = pag->pag_agno + 1;
		*first = pag->pag_agno + 1;
		/* open coded pag reference increment */
		/* open coded pag reference increment */
		ref = atomic_inc_return(&pag->pag_ref);
		ref = atomic_inc_return(&pag->pag_ref);
		spin_unlock(&mp->m_perag_lock);
		rcu_read_unlock();
		trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
		trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
	} else {
	} else {
		pag = xfs_perag_get(mp, *first);
		pag = xfs_perag_get(mp, *first);
+3 −0
Original line number Original line Diff line number Diff line
@@ -230,6 +230,9 @@ typedef struct xfs_perag {
	rwlock_t	pag_ici_lock;	/* incore inode lock */
	rwlock_t	pag_ici_lock;	/* incore inode lock */
	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
	int		pag_ici_reclaimable;	/* reclaimable inodes */
	int		pag_ici_reclaimable;	/* reclaimable inodes */

	/* for rcu-safe freeing */
	struct rcu_head	rcu_head;
#endif
#endif
	int		pagb_count;	/* pagb slots in use */
	int		pagb_count;	/* pagb slots in use */
} xfs_perag_t;
} xfs_perag_t;
+17 −8
Original line number Original line Diff line number Diff line
@@ -199,6 +199,8 @@ xfs_uuid_unmount(


/*
/*
 * Reference counting access wrappers to the perag structures.
 * Reference counting access wrappers to the perag structures.
 * Because we never free per-ag structures, the only thing we
 * have to protect against changes is the tree structure itself.
 */
 */
struct xfs_perag *
struct xfs_perag *
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -206,13 +208,13 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
	struct xfs_perag	*pag;
	struct xfs_perag	*pag;
	int			ref = 0;
	int			ref = 0;


	spin_lock(&mp->m_perag_lock);
	rcu_read_lock();
	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
	if (pag) {
	if (pag) {
		ASSERT(atomic_read(&pag->pag_ref) >= 0);
		ASSERT(atomic_read(&pag->pag_ref) >= 0);
		ref = atomic_inc_return(&pag->pag_ref);
		ref = atomic_inc_return(&pag->pag_ref);
	}
	}
	spin_unlock(&mp->m_perag_lock);
	rcu_read_unlock();
	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
	return pag;
	return pag;
}
}
@@ -227,10 +229,18 @@ xfs_perag_put(struct xfs_perag *pag)
	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
}
}


STATIC void
__xfs_free_perag(
	struct rcu_head	*head)
{
	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);

	ASSERT(atomic_read(&pag->pag_ref) == 0);
	kmem_free(pag);
}

/*
/*
 * Free up the resources associated with a mount structure.  Assume that
 * Free up the per-ag resources associated with the mount structure.
 * the structure was initially zeroed, so we can tell which fields got
 * initialized.
 */
 */
STATIC void
STATIC void
xfs_free_perag(
xfs_free_perag(
@@ -242,10 +252,9 @@ xfs_free_perag(
	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
		spin_lock(&mp->m_perag_lock);
		spin_lock(&mp->m_perag_lock);
		pag = radix_tree_delete(&mp->m_perag_tree, agno);
		pag = radix_tree_delete(&mp->m_perag_tree, agno);
		ASSERT(pag);
		ASSERT(atomic_read(&pag->pag_ref) == 0);
		spin_unlock(&mp->m_perag_lock);
		spin_unlock(&mp->m_perag_lock);
		kmem_free(pag);
		ASSERT(pag);
		call_rcu(&pag->rcu_head, __xfs_free_perag);
	}
	}
}
}