Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f920f11 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers
Browse files

xfs: use per-filesystem radix trees for dquot lookup



Replace the global hash tables for looking up in-memory dquot structures
with per-filesystem radix trees to allow scaling to a large number of
in-memory dquot structures.

Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent f8739c3c
Loading
Loading
Loading
Loading
+48 −140
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@
 * Lock order:
 *
 * ip->i_lock
 *   qh->qh_lock
 *   qi->qi_tree_lock
 *     qi->qi_dqlist_lock
 *       dquot->q_qlock (xfs_dqlock() and friends)
 *         dquot->q_flush (xfs_dqflock() and friends)
@@ -601,60 +601,6 @@ error0:
	return error;
}

/*
 * Lookup a dquot in the incore dquot hashtable. We keep two separate
 * hashtables for user and group dquots; and, these are global tables
 * inside the XQM, not per-filesystem tables.
 * The hash chain must be locked by caller, and it is left locked
 * on return. Returning dquot is locked.
 */
STATIC int
xfs_qm_dqlookup(
	xfs_mount_t		*mp,
	xfs_dqid_t		id,
	xfs_dqhash_t		*qh,
	xfs_dquot_t		**O_dqpp)
{
	xfs_dquot_t		*dqp;

	ASSERT(mutex_is_locked(&qh->qh_lock));

	/*
	 * Traverse the hashchain looking for a match
	 */
	list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
		/*
		 * We already have the hashlock. We don't need the
		 * dqlock to look at the id field of the dquot, since the
		 * id can't be modified without the hashlock anyway.
		 */
		if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
			continue;

		trace_xfs_dqlookup_found(dqp);

		xfs_dqlock(dqp);
		if (dqp->dq_flags & XFS_DQ_FREEING) {
			*O_dqpp = NULL;
			xfs_dqunlock(dqp);
			return -1;
		}

		dqp->q_nrefs++;

		/*
		 * move the dquot to the front of the hashchain
		 */
		list_move(&dqp->q_hashlist, &qh->qh_list);
		trace_xfs_dqlookup_done(dqp);
		*O_dqpp = dqp;
		return 0;
	}

	*O_dqpp = NULL;
	return 1;
}

/*
 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
 * a locked dquot, doing an allocation (if requested) as needed.
@@ -672,9 +618,9 @@ xfs_qm_dqget(
	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
{
	xfs_dquot_t	*dqp, *dqp1;
	xfs_dqhash_t	*h;
	uint		version;
	struct xfs_quotainfo	*qi = mp->m_quotainfo;
	struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
	struct xfs_dquot	*dqp;
	int			error;

	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -683,7 +629,6 @@ xfs_qm_dqget(
	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
		return (ESRCH);
	}
	h = XFS_DQ_HASH(mp, id, type);

#ifdef DEBUG
	if (xfs_do_dqerror) {
@@ -704,34 +649,28 @@ xfs_qm_dqget(
#endif

restart:
	mutex_lock(&h->qh_lock);

	/*
	 * Look in the cache (hashtable).
	 * The chain is kept locked during lookup.
	 */
	switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
	case -1:
		XFS_STATS_INC(xs_qm_dquot_dups);
		mutex_unlock(&h->qh_lock);
	mutex_lock(&qi->qi_tree_lock);
	dqp = radix_tree_lookup(tree, id);
	if (dqp) {
		xfs_dqlock(dqp);
		if (dqp->dq_flags & XFS_DQ_FREEING) {
			xfs_dqunlock(dqp);
			mutex_unlock(&qi->qi_tree_lock);
			trace_xfs_dqget_freeing(dqp);
			delay(1);
			goto restart;
	case 0:
		}

		dqp->q_nrefs++;
		mutex_unlock(&qi->qi_tree_lock);

		trace_xfs_dqget_hit(dqp);
		XFS_STATS_INC(xs_qm_dqcachehits);
		/*
		 * The dquot was found, moved to the front of the chain,
		 * taken off the freelist if it was on it, and locked
		 * at this point. Just unlock the hashchain and return.
		 */
		ASSERT(*O_dqpp);
		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
		mutex_unlock(&h->qh_lock);
		trace_xfs_dqget_hit(*O_dqpp);
		return 0;	/* success */
	default:
		XFS_STATS_INC(xs_qm_dqcachemisses);
		break;
		*O_dqpp = dqp;
		return 0;
	}
	mutex_unlock(&qi->qi_tree_lock);
	XFS_STATS_INC(xs_qm_dqcachemisses);

	/*
	 * Dquot cache miss. We don't want to keep the inode lock across
@@ -742,12 +681,6 @@ restart:
	 */
	if (ip)
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	/*
	 * Save the hashchain version stamp, and unlock the chain, so that
	 * we don't keep the lock across a disk read
	 */
	version = h->qh_version;
	mutex_unlock(&h->qh_lock);

	error = xfs_qm_dqread(mp, id, type, flags, &dqp);

@@ -757,15 +690,14 @@ restart:
	if (error)
		return error;

	/*
	 * Dquot lock comes after hashlock in the lock ordering
	 */
	if (ip) {
		/*
		 * A dquot could be attached to this inode by now, since
		 * we had dropped the ilock.
		 */
		if (xfs_this_quota_on(mp, type)) {
			struct xfs_dquot	*dqp1;

			dqp1 = xfs_inode_dquot(ip, type);
			if (dqp1) {
				xfs_qm_dqdestroy(dqp);
@@ -780,51 +712,27 @@ restart:
		}
	}

	mutex_lock(&qi->qi_tree_lock);
	error = -radix_tree_insert(tree, id, dqp);
	if (unlikely(error)) {
		WARN_ON(error != EEXIST);

		/*
	 * Hashlock comes after ilock in lock order
	 */
	mutex_lock(&h->qh_lock);
	if (version != h->qh_version) {
		xfs_dquot_t *tmpdqp;
		/*
		 * Now, see if somebody else put the dquot in the
		 * hashtable before us. This can happen because we didn't
		 * keep the hashchain lock. We don't have to worry about
		 * lock order between the two dquots here since dqp isn't
		 * on any findable lists yet.
		 */
		switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
		case 0:
		case -1:
			/*
			 * Duplicate found, either in cache or on its way out.
			 * Just throw away the new dquot and start over.
		 * Duplicate found. Just throw away the new dquot and start
		 * over.
		 */
			if (tmpdqp)
				xfs_qm_dqput(tmpdqp);
			mutex_unlock(&h->qh_lock);
		mutex_unlock(&qi->qi_tree_lock);
		trace_xfs_dqget_dup(dqp);
		xfs_qm_dqdestroy(dqp);
		XFS_STATS_INC(xs_qm_dquot_dups);
		goto restart;
		default:
			break;
		}
	}

	/*
	 * Put the dquot at the beginning of the hash-chain and mp's list
	 * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
	 */
	ASSERT(mutex_is_locked(&h->qh_lock));
	dqp->q_hash = h;
	list_add(&dqp->q_hashlist, &h->qh_list);
	h->qh_version++;

	/*
	 * Attach this dquot to this filesystem's list of all dquots,
	 * kept inside the mount structure in m_quotainfo field
	 */
	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
	mutex_lock(&qi->qi_dqlist_lock);

	/*
	 * We return a locked dquot to the caller, with a reference taken
@@ -832,10 +740,11 @@ restart:
	xfs_dqlock(dqp);
	dqp->q_nrefs = 1;

	list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
	mp->m_quotainfo->qi_dquots++;
	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
	mutex_unlock(&h->qh_lock);
	list_add(&dqp->q_mplist, &qi->qi_dqlist);
	qi->qi_dquots++;
	mutex_unlock(&qi->qi_dqlist_lock);
	mutex_unlock(&qi->qi_tree_lock);

 dqret:
	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
	trace_xfs_dqget_miss(dqp);
@@ -1117,7 +1026,6 @@ xfs_qm_dqpurge(
	struct xfs_dquot	*dqp)
{
	struct xfs_mount	*mp = dqp->q_mount;
	struct xfs_dqhash	*qh = dqp->q_hash;
	struct xfs_quotainfo	*qi = mp->m_quotainfo;

	xfs_dqlock(dqp);
@@ -1164,10 +1072,10 @@ xfs_qm_dqpurge(
	xfs_dqfunlock(dqp);
	xfs_dqunlock(dqp);

	mutex_lock(&qh->qh_lock);
	list_del_init(&dqp->q_hashlist);
	qh->qh_version++;
	mutex_unlock(&qh->qh_lock);
	mutex_lock(&qi->qi_tree_lock);
	radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
			  be32_to_cpu(dqp->q_core.d_id));
	mutex_unlock(&qi->qi_tree_lock);

	mutex_lock(&qi->qi_dqlist_lock);
	list_del_init(&dqp->q_mplist);
+0 −12
Original line number Diff line number Diff line
@@ -29,16 +29,6 @@
 * when quotas are off.
 */

/*
 * The hash chain headers (hash buckets)
 */
typedef struct xfs_dqhash {
	struct list_head  qh_list;
	struct mutex	  qh_lock;
	uint		  qh_version;	/* ever increasing version */
	uint		  qh_nelems;	/* number of dquots on the list */
} xfs_dqhash_t;

struct xfs_mount;
struct xfs_trans;

@@ -49,8 +39,6 @@ typedef struct xfs_dquot {
	uint		 dq_flags;	/* various flags (XFS_DQ_*) */
	struct list_head q_lru;		/* global free list of dquots */
	struct list_head q_mplist;	/* mount's list of dquots */
	struct list_head q_hashlist;	/* gloabl hash list of dquots */
	xfs_dqhash_t	*q_hash;	/* the hashchain header */
	struct xfs_mount*q_mount;	/* filesystem this relates to */
	struct xfs_trans*q_transp;	/* trans this belongs to currently */
	uint		 q_nrefs;	/* # active refs from inodes */
+8 −87
Original line number Diff line number Diff line
@@ -54,9 +54,6 @@ struct xfs_qm *xfs_Gqm;
kmem_zone_t	*qm_dqzone;
kmem_zone_t	*qm_dqtrxzone;

STATIC void	xfs_qm_list_init(xfs_dqlist_t *, char *, int);
STATIC void	xfs_qm_list_destroy(xfs_dqlist_t *);

STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
STATIC int	xfs_qm_shake(struct shrinker *, struct shrink_control *);
@@ -68,37 +65,9 @@ STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
STATIC struct xfs_qm *
xfs_Gqm_init(void)
{
	xfs_dqhash_t	*udqhash, *gdqhash;
	xfs_qm_t	*xqm;
	size_t		hsize;
	uint		i;

	/*
	 * Initialize the dquot hash tables.
	 */
	udqhash = kmem_zalloc_greedy(&hsize,
				     XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
				     XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
	if (!udqhash)
		goto out;

	gdqhash = kmem_zalloc_large(hsize);
	if (!gdqhash)
		goto out_free_udqhash;

	hsize /= sizeof(xfs_dqhash_t);

	xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
	xqm->qm_dqhashmask = hsize - 1;
	xqm->qm_usr_dqhtable = udqhash;
	xqm->qm_grp_dqhtable = gdqhash;
	ASSERT(xqm->qm_usr_dqhtable != NULL);
	ASSERT(xqm->qm_grp_dqhtable != NULL);

	for (i = 0; i < hsize; i++) {
		xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
		xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
	}

	/*
	 * dquot zone. we register our own low-memory callback.
@@ -122,11 +91,6 @@ xfs_Gqm_init(void)

	xqm->qm_nrefs = 0;
	return xqm;

 out_free_udqhash:
	kmem_free_large(udqhash);
 out:
	return NULL;
}

/*
@@ -136,22 +100,9 @@ STATIC void
xfs_qm_destroy(
	struct xfs_qm	*xqm)
{
	int		hsize, i;

	ASSERT(xqm != NULL);
	ASSERT(xqm->qm_nrefs == 0);

	hsize = xqm->qm_dqhashmask + 1;
	for (i = 0; i < hsize; i++) {
		xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
		xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
	}
	kmem_free_large(xqm->qm_usr_dqhtable);
	kmem_free_large(xqm->qm_grp_dqhtable);
	xqm->qm_usr_dqhtable = NULL;
	xqm->qm_grp_dqhtable = NULL;
	xqm->qm_dqhashmask = 0;

	kmem_free(xqm);
}

@@ -761,14 +712,6 @@ xfs_qm_dqdetach(
	}
}

/*
 * The hash chains and the mplist use the same xfs_dqhash structure as
 * their list head, but we can take the mplist qh_lock and one of the
 * hash qh_locks at the same time without any problem as they aren't
 * related.
 */
static struct lock_class_key xfs_quota_mplist_class;

/*
 * This initializes all the quota information that's kept in the
 * mount structure
@@ -802,9 +745,12 @@ xfs_qm_init_quotainfo(
		return error;
	}

	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
	mutex_init(&qinf->qi_tree_lock);

	INIT_LIST_HEAD(&qinf->qi_dqlist);
	mutex_init(&qinf->qi_dqlist_lock);
	lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);

	INIT_LIST_HEAD(&qinf->qi_lru_list);
	qinf->qi_lru_count = 0;
@@ -924,30 +870,6 @@ xfs_qm_destroy_quotainfo(
	mp->m_quotainfo = NULL;
}



/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */

/* ARGSUSED */
STATIC void
xfs_qm_list_init(
	xfs_dqlist_t	*list,
	char		*str,
	int		n)
{
	mutex_init(&list->qh_lock);
	INIT_LIST_HEAD(&list->qh_list);
	list->qh_version = 0;
	list->qh_nelems = 0;
}

STATIC void
xfs_qm_list_destroy(
	xfs_dqlist_t	*list)
{
	mutex_destroy(&(list->qh_lock));
}

/*
 * Create an inode and return with a reference already taken, but unlocked
 * This is how we create quota inodes
@@ -1592,10 +1514,10 @@ xfs_qm_dqfree_one(
	struct xfs_mount	*mp = dqp->q_mount;
	struct xfs_quotainfo	*qi = mp->m_quotainfo;

	mutex_lock(&dqp->q_hash->qh_lock);
	list_del_init(&dqp->q_hashlist);
	dqp->q_hash->qh_version++;
	mutex_unlock(&dqp->q_hash->qh_lock);
	mutex_lock(&qi->qi_tree_lock);
	radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
			  be32_to_cpu(dqp->q_core.d_id));
	mutex_unlock(&qi->qi_tree_lock);

	mutex_lock(&qi->qi_dqlist_lock);
	list_del_init(&dqp->q_mplist);
@@ -1634,7 +1556,6 @@ xfs_qm_dqreclaim_one(
		return;
	}

	ASSERT(dqp->q_hash);
	ASSERT(!list_empty(&dqp->q_mplist));

	/*
+8 −11
Original line number Diff line number Diff line
@@ -30,12 +30,6 @@ extern struct xfs_qm *xfs_Gqm;
extern kmem_zone_t	*qm_dqzone;
extern kmem_zone_t	*qm_dqtrxzone;

/*
 * Dquot hashtable constants/threshold values.
 */
#define XFS_QM_HASHSIZE_LOW		(PAGE_SIZE / sizeof(xfs_dqhash_t))
#define XFS_QM_HASHSIZE_HIGH		((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))

/*
 * This defines the unit of allocation of dquots.
 * Currently, it is just one file system block, and a 4K blk contains 30
@@ -47,15 +41,10 @@ extern kmem_zone_t *qm_dqtrxzone;
 */
#define XFS_DQUOT_CLUSTER_SIZE_FSB	(xfs_filblks_t)1

typedef xfs_dqhash_t	xfs_dqlist_t;

/*
 * Quota Manager (global) structure. Lives only in core.
 */
typedef struct xfs_qm {
	xfs_dqlist_t	*qm_usr_dqhtable;/* udquot hash table */
	xfs_dqlist_t	*qm_grp_dqhtable;/* gdquot hash table */
	uint		 qm_dqhashmask;	 /* # buckets in dq hashtab - 1 */
	uint		 qm_nrefs;	 /* file systems with quota on */
	kmem_zone_t	*qm_dqzone;	 /* dquot mem-alloc zone */
	kmem_zone_t	*qm_dqtrxzone;	 /* t_dqinfo of transactions */
@@ -66,6 +55,9 @@ typedef struct xfs_qm {
 * The mount structure keeps a pointer to this.
 */
typedef struct xfs_quotainfo {
	struct radix_tree_root qi_uquota_tree;
	struct radix_tree_root qi_gquota_tree;
	struct mutex qi_tree_lock;
	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */
	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */
	struct list_head qi_lru_list;
@@ -94,6 +86,11 @@ typedef struct xfs_quotainfo {
	struct shrinker  qi_shrinker;
} xfs_quotainfo_t;

#define XFS_DQUOT_TREE(qi, type) \
	((type & XFS_DQ_USER) ? \
	 &((qi)->qi_uquota_tree) : \
	 &((qi)->qi_gquota_tree))


extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+0 −11
Original line number Diff line number Diff line
@@ -24,17 +24,6 @@
 */
#define XFS_DQITER_MAP_SIZE	10

/*
 * Hash into a bucket in the dquot hash table, based on <mp, id>.
 */
#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
				 (__psunsigned_t)(id)) & \
				(xfs_Gqm->qm_dqhashmask - 1))
#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
				     (xfs_Gqm->qm_usr_dqhtable + \
				      XFS_DQ_HASHVAL(mp, id)) : \
				     (xfs_Gqm->qm_grp_dqhtable + \
				      XFS_DQ_HASHVAL(mp, id)))
#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
	!dqp->q_core.d_blk_hardlimit && \
	!dqp->q_core.d_blk_softlimit && \
Loading