Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 92678554 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers
Browse files

xfs: flatten the dquot lock ordering



Introduce a new XFS_DQ_FREEING flag that tells lookup and mplist walks
to skip a dquot that is beeing freed, and use this avoid the trylock
on the hash and mplist locks in xfs_qm_dqreclaim_one.  Also simplify
xfs_dqpurge by moving the inodes to a dispose list after marking them
XFS_DQ_FREEING and avoid the locker ordering constraints.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent be7ffc38
Loading
Loading
Loading
Loading
+50 −63
Original line number Diff line number Diff line
@@ -728,6 +728,12 @@ xfs_qm_dqlookup(
		trace_xfs_dqlookup_found(dqp);

		xfs_dqlock(dqp);
		if (dqp->dq_flags & XFS_DQ_FREEING) {
			*O_dqpp = NULL;
			xfs_dqunlock(dqp);
			return -1;
		}

		XFS_DQHOLD(dqp);

		/*
@@ -781,11 +787,7 @@ xfs_qm_dqget(
			return (EIO);
		}
	}
#endif

 again:

#ifdef DEBUG
	ASSERT(type == XFS_DQ_USER ||
	       type == XFS_DQ_PROJ ||
	       type == XFS_DQ_GROUP);
@@ -797,13 +799,21 @@ xfs_qm_dqget(
			ASSERT(ip->i_gdquot == NULL);
	}
#endif

restart:
	mutex_lock(&h->qh_lock);

	/*
	 * Look in the cache (hashtable).
	 * The chain is kept locked during lookup.
	 */
	if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
	switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
	case -1:
		XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
		mutex_unlock(&h->qh_lock);
		delay(1);
		goto restart;
	case 0:
		XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
		/*
		 * The dquot was found, moved to the front of the chain,
@@ -814,9 +824,11 @@ xfs_qm_dqget(
		ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
		mutex_unlock(&h->qh_lock);
		trace_xfs_dqget_hit(*O_dqpp);
		return (0);	/* success */
	}
		return 0;	/* success */
	default:
		XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
		break;
	}

	/*
	 * Dquot cache miss. We don't want to keep the inode lock across
@@ -913,16 +925,21 @@ xfs_qm_dqget(
		 * lock order between the two dquots here since dqp isn't
		 * on any findable lists yet.
		 */
		if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
		switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
		case 0:
		case -1:
			/*
			 * Duplicate found. Just throw away the new dquot
			 * and start over.
			 * Duplicate found, either in cache or on its way out.
			 * Just throw away the new dquot and start over.
			 */
			if (tmpdqp)
				xfs_qm_dqput(tmpdqp);
			mutex_unlock(&h->qh_lock);
			xfs_qm_dqdestroy(dqp);
			XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
			goto again;
			goto restart;
		default:
			break;
		}
	}

@@ -1250,51 +1267,18 @@ xfs_dqlock2(
	}
}


/*
 * Take a dquot out of the mount's dqlist as well as the hashlist.
 * This is called via unmount as well as quotaoff, and the purge
 * will always succeed unless there are soft (temp) references
 * outstanding.
 *
 * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
 * that we're returning! XXXsup - not cool.
 * Take a dquot out of the mount's dqlist as well as the hashlist.  This is
 * called via unmount as well as quotaoff, and the purge will always succeed.
 */
/* ARGSUSED */
int
void
xfs_qm_dqpurge(
	xfs_dquot_t	*dqp)
	struct xfs_dquot	*dqp)
{
	xfs_dqhash_t	*qh = dqp->q_hash;
	xfs_mount_t	*mp = dqp->q_mount;

	ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
	ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));

	/*
	 * XXX(hch): horrible locking order, will get cleaned up ASAP.
	 */
	if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
		mutex_unlock(&dqp->q_hash->qh_lock);
		return 1;
	}
	struct xfs_mount	*mp = dqp->q_mount;
	struct xfs_dqhash	*qh = dqp->q_hash;

	xfs_dqlock(dqp);
	/*
	 * We really can't afford to purge a dquot that is
	 * referenced, because these are hard refs.
	 * It shouldn't happen in general because we went thru _all_ inodes in
	 * dqrele_all_inodes before calling this and didn't let the mountlock go.
	 * However it is possible that we have dquots with temporary
	 * references that are not attached to an inode. e.g. see xfs_setattr().
	 */
	if (dqp->q_nrefs != 0) {
		xfs_dqunlock(dqp);
		mutex_unlock(&dqp->q_hash->qh_lock);
		return (1);
	}

	ASSERT(!list_empty(&dqp->q_freelist));

	/*
	 * If we're turning off quotas, we have to make sure that, for
@@ -1313,19 +1297,14 @@ xfs_qm_dqpurge(
	}

	/*
	 * XXXIf we're turning this type of quotas off, we don't care
	 * If we are turning this type of quotas off, we don't care
	 * about the dirty metadata sitting in this dquot. OTOH, if
	 * we're unmounting, we do care, so we flush it and wait.
	 */
	if (XFS_DQ_IS_DIRTY(dqp)) {
		int	error;

		/* dqflush unlocks dqflock */
		/*
		 * Given that dqpurge is a very rare occurrence, it is OK
		 * that we're holding the hashlist and mplist locks
		 * across the disk write. But, ... XXXsup
		 *
		 * We don't care about getting disk errors here. We need
		 * to purge this dquot anyway, so we go ahead regardless.
		 */
@@ -1335,28 +1314,36 @@ xfs_qm_dqpurge(
				__func__, dqp);
		xfs_dqflock(dqp);
	}

	ASSERT(atomic_read(&dqp->q_pincount) == 0);
	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));

	xfs_dqfunlock(dqp);
	xfs_dqunlock(dqp);

	mutex_lock(&qh->qh_lock);
	list_del_init(&dqp->q_hashlist);
	qh->qh_version++;
	mutex_unlock(&qh->qh_lock);

	mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
	list_del_init(&dqp->q_mplist);
	mp->m_quotainfo->qi_dqreclaims++;
	mp->m_quotainfo->qi_dquots--;
	mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);

	/*
	 * We move dquots to the freelist as soon as their reference count
	 * hits zero, so it really should be on the freelist here.
	 */
	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
	ASSERT(!list_empty(&dqp->q_freelist));
	list_del_init(&dqp->q_freelist);
	xfs_Gqm->qm_dqfrlist_cnt--;

	xfs_dqfunlock(dqp);
	xfs_dqunlock(dqp);

	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
	mutex_unlock(&qh->qh_lock);

	xfs_qm_dqdestroy(dqp);
	return 0;
}

/*
+1 −1
Original line number Diff line number Diff line
@@ -133,7 +133,7 @@ static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)

extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
extern int		xfs_qm_dqflush(xfs_dquot_t *, uint);
extern int		xfs_qm_dqpurge(xfs_dquot_t *);
extern void		xfs_qm_dqpurge(xfs_dquot_t *);
extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
extern void		xfs_qm_adjust_dqtimers(xfs_mount_t *,
					xfs_disk_dquot_t *);
+49 −85
Original line number Diff line number Diff line
@@ -398,7 +398,8 @@ xfs_qm_dqflush_all(
	mutex_lock(&q->qi_dqlist_lock);
	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
		xfs_dqlock(dqp);
		if (! XFS_DQ_IS_DIRTY(dqp)) {
		if ((dqp->dq_flags & XFS_DQ_FREEING) ||
		    !XFS_DQ_IS_DIRTY(dqp)) {
			xfs_dqunlock(dqp);
			continue;
		}
@@ -437,6 +438,7 @@ xfs_qm_dqflush_all(
	/* return ! busy */
	return 0;
}

/*
 * Release the group dquot pointers the user dquots may be
 * carrying around as a hint. mplist is locked on entry and exit.
@@ -453,6 +455,13 @@ xfs_qm_detach_gdquots(
	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
	list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
		xfs_dqlock(dqp);
		if (dqp->dq_flags & XFS_DQ_FREEING) {
			xfs_dqunlock(dqp);
			mutex_unlock(&q->qi_dqlist_lock);
			delay(1);
			mutex_lock(&q->qi_dqlist_lock);
			goto again;
		}
		if ((gdqp = dqp->q_gdquot)) {
			xfs_dqlock(gdqp);
			dqp->q_gdquot = NULL;
@@ -489,8 +498,8 @@ xfs_qm_dqpurge_int(
	struct xfs_quotainfo	*q = mp->m_quotainfo;
	struct xfs_dquot	*dqp, *n;
	uint			dqtype;
	int			nrecl;
	int			nmisses;
	int			nmisses = 0;
	LIST_HEAD		(dispose_list);

	if (!q)
		return 0;
@@ -509,46 +518,26 @@ xfs_qm_dqpurge_int(
	 */
	xfs_qm_detach_gdquots(mp);

      again:
	nmisses = 0;
	ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
	/*
	 * Try to get rid of all of the unwanted dquots. The idea is to
	 * get them off mplist and hashlist, but leave them on freelist.
	 * Try to get rid of all of the unwanted dquots.
	 */
	list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
		xfs_dqlock(dqp);
		if ((dqp->dq_flags & dqtype) == 0) {
			xfs_dqunlock(dqp);
			continue;
		if ((dqp->dq_flags & dqtype) != 0 &&
		    !(dqp->dq_flags & XFS_DQ_FREEING)) {
			if (dqp->q_nrefs == 0) {
				dqp->dq_flags |= XFS_DQ_FREEING;
				list_move_tail(&dqp->q_mplist, &dispose_list);
			} else
				nmisses++;
		}
		xfs_dqunlock(dqp);

		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
			nrecl = q->qi_dqreclaims;
	}
	mutex_unlock(&q->qi_dqlist_lock);
			mutex_lock(&dqp->q_hash->qh_lock);
			mutex_lock(&q->qi_dqlist_lock);

			/*
			 * XXXTheoretically, we can get into a very long
			 * ping pong game here.
			 * No one can be adding dquots to the mplist at
			 * this point, but somebody might be taking things off.
			 */
			if (nrecl != q->qi_dqreclaims) {
				mutex_unlock(&dqp->q_hash->qh_lock);
				goto again;
			}
		}
	list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
		xfs_qm_dqpurge(dqp);

		/*
		 * Take the dquot off the mplist and hashlist. It may remain on
		 * freelist in INACTIVE state.
		 */
		nmisses += xfs_qm_dqpurge(dqp);
	}
	mutex_unlock(&q->qi_dqlist_lock);
	return nmisses;
}

@@ -1667,25 +1656,16 @@ xfs_qm_init_quotainos(


/*
 * Just pop the least recently used dquot off the freelist and
 * recycle it. The returned dquot is locked.
 * Pop the least recently used dquot off the freelist and recycle it.
 */
STATIC xfs_dquot_t *
STATIC struct xfs_dquot *
xfs_qm_dqreclaim_one(void)
{
	xfs_dquot_t	*dqpout;
	xfs_dquot_t	*dqp;
	int		restarts;
	int		startagain;

	restarts = 0;
	dqpout = NULL;
	struct xfs_dquot	*dqp;
	int			restarts = 0;

	/* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
again:
	startagain = 0;
	mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);

restart:
	list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
		struct xfs_mount *mp = dqp->q_mount;
		xfs_dqlock(dqp);
@@ -1701,7 +1681,6 @@ xfs_qm_dqreclaim_one(void)
			list_del_init(&dqp->q_freelist);
			xfs_Gqm->qm_dqfrlist_cnt--;
			restarts++;
			startagain = 1;
			goto dqunlock;
		}

@@ -1737,57 +1716,42 @@ xfs_qm_dqreclaim_one(void)
			}
			goto dqunlock;
		}
		xfs_dqfunlock(dqp);

		/*
		 * We're trying to get the hashlock out of order. This races
		 * with dqlookup; so, we giveup and goto the next dquot if
		 * we couldn't get the hashlock. This way, we won't starve
		 * a dqlookup process that holds the hashlock that is
		 * waiting for the freelist lock.
		 * Prevent lookup now that we are going to reclaim the dquot.
		 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
		 * thus we can drop the lock now.
		 */
		if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
			restarts++;
			goto dqfunlock;
		}
		dqp->dq_flags |= XFS_DQ_FREEING;
		xfs_dqunlock(dqp);

		/*
		 * This races with dquot allocation code as well as dqflush_all
		 * and reclaim code. So, if we failed to grab the mplist lock,
		 * giveup everything and start over.
		 */
		if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
			restarts++;
			startagain = 1;
			goto qhunlock;
		}
		mutex_lock(&dqp->q_hash->qh_lock);
		list_del_init(&dqp->q_hashlist);
		dqp->q_hash->qh_version++;
		mutex_unlock(&dqp->q_hash->qh_lock);

		ASSERT(dqp->q_nrefs == 0);
		mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
		list_del_init(&dqp->q_mplist);
		mp->m_quotainfo->qi_dquots--;
		mp->m_quotainfo->qi_dqreclaims++;
		list_del_init(&dqp->q_hashlist);
		dqp->q_hash->qh_version++;
		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);

		ASSERT(dqp->q_nrefs == 0);
		list_del_init(&dqp->q_freelist);
		xfs_Gqm->qm_dqfrlist_cnt--;
		dqpout = dqp;
		mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
qhunlock:
		mutex_unlock(&dqp->q_hash->qh_lock);
dqfunlock:
		xfs_dqfunlock(dqp);

		mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
		return dqp;
dqunlock:
		xfs_dqunlock(dqp);
		if (dqpout)
			break;
		if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
			break;
		if (startagain) {
			mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
			goto again;
		}
		goto restart;
	}

	mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
	return dqpout;
	return NULL;
}

/*
+3 −1
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ typedef struct xfs_dqblk {
#define XFS_DQ_PROJ		0x0002		/* project quota */
#define XFS_DQ_GROUP		0x0004		/* a group quota */
#define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */
#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */

#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)

@@ -94,7 +95,8 @@ typedef struct xfs_dqblk {
	{ XFS_DQ_USER,		"USER" }, \
	{ XFS_DQ_PROJ,		"PROJ" }, \
	{ XFS_DQ_GROUP,		"GROUP" }, \
	{ XFS_DQ_DIRTY,		"DIRTY" }
	{ XFS_DQ_DIRTY,		"DIRTY" }, \
	{ XFS_DQ_FREEING,	"FREEING" }

/*
 * In the worst case, when both user and group quotas are on,