Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b9ba6f94 authored by Niu Yawei's avatar Niu Yawei Committed by Jan Kara
Browse files

quota: remove dqptr_sem



Remove dqptr_sem to make quota code scalable: Remove the dqptr_sem,
accessing inode->i_dquot now protected by dquot_srcu, and changing
inode->i_dquot is now serialized by dq_data_lock.

Signed-off-by: default avatarLai Siyao <lai.siyao@intel.com>
Signed-off-by: default avatarNiu Yawei <yawei.niu@intel.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent 9eb6463f
Loading
Loading
Loading
Loading
+49 −65
Original line number Diff line number Diff line
@@ -96,13 +96,16 @@
 * Note that some things (eg. sb pointer, type, id) doesn't change during
 * the life of the dquot structure and so needn't to be protected by a lock
 *
 * Any operation working on dquots via inode pointers must hold dqptr_sem.  If
 * operation is just reading pointers from inode (or not using them at all) the
 * read lock is enough. If pointers are altered function must hold write lock.
 * Operation accessing dquots via inode pointers are protected by dquot_srcu.
 * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
 * synchronize_srcu(&dquot_srcu) is called after clearing pointers from
 * inode and before dropping dquot references to avoid use of dquots after
 * they are freed. dq_data_lock is used to serialize the pointer setting and
 * clearing operations.
 * Special care needs to be taken about S_NOQUOTA inode flag (marking that
 * inode is a quota file). Functions adding pointers from inode to dquots have
 * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
 * have to do all pointer modifications before dropping dqptr_sem. This makes
 * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
 * have to do all pointer modifications before dropping dq_data_lock. This makes
 * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
 * then drops all pointers to dquots from an inode.
 *
@@ -116,21 +119,15 @@
 * spinlock to internal buffers before writing.
 *
 * Lock ordering (including related VFS locks) is the following:
 *   dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock >
 *   dqio_mutex
 *   dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
 * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
 * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
 * dqptr_sem. But filesystem has to count with the fact that functions such as
 * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
 * from inside a transaction to keep filesystem consistency after a crash. Also
 * filesystems usually want to do some IO on dquot from ->mark_dirty which is
 * called with dqptr_sem held.
 */

static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
EXPORT_SYMBOL(dq_data_lock);
DEFINE_STATIC_SRCU(dquot_srcu);

void __quota_error(struct super_block *sb, const char *func,
		   const char *fmt, ...)
@@ -964,7 +961,6 @@ static void add_dquot_ref(struct super_block *sb, int type)
/*
 * Remove references to dquots from inode and add dquot to list for freeing
 * if we have the last reference to dquot
 * We can't race with anybody because we hold dqptr_sem for writing...
 */
static void remove_inode_dquot_ref(struct inode *inode, int type,
				   struct list_head *tofree_head)
@@ -1024,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type,
		 *  We have to scan also I_NEW inodes because they can already
		 *  have quota pointer initialized. Luckily, we need to touch
		 *  only quota pointers and these have separate locking
		 *  (dqptr_sem).
		 *  (dq_data_lock).
		 */
		spin_lock(&dq_data_lock);
		if (!IS_NOQUOTA(inode)) {
			if (unlikely(inode_get_rsv_space(inode) > 0))
				reserved = 1;
			remove_inode_dquot_ref(inode, type, tofree_head);
		}
		spin_unlock(&dq_data_lock);
	}
	spin_unlock(&inode_sb_list_lock);
#ifdef CONFIG_QUOTA_DEBUG
@@ -1048,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type)
	LIST_HEAD(tofree_head);

	if (sb->dq_op) {
		down_write(&sb_dqopt(sb)->dqptr_sem);
		remove_dquot_ref(sb, type, &tofree_head);
		up_write(&sb_dqopt(sb)->dqptr_sem);
		synchronize_srcu(&dquot_srcu);
		put_dquot_list(&tofree_head);
	}
}
@@ -1381,9 +1378,6 @@ static int dquot_active(const struct inode *inode)
/*
 * Initialize quota pointers in inode
 *
 * We do things in a bit complicated way but by that we avoid calling
 * dqget() and thus filesystem callbacks under dqptr_sem.
 *
 * It is better to call this function outside of any transaction as it
 * might need a lot of space in journal for dquot structure allocation.
 */
@@ -1394,8 +1388,6 @@ static void __dquot_initialize(struct inode *inode, int type)
	struct super_block *sb = inode->i_sb;
	qsize_t rsv;

	/* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
	if (!dquot_active(inode))
		return;

@@ -1429,7 +1421,7 @@ static void __dquot_initialize(struct inode *inode, int type)
	if (!init_needed)
		return;

	down_write(&sb_dqopt(sb)->dqptr_sem);
	spin_lock(&dq_data_lock);
	if (IS_NOQUOTA(inode))
		goto out_err;
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type)
			 * did a write before quota was turned on
			 */
			rsv = inode_get_rsv_space(inode);
			if (unlikely(rsv)) {
				spin_lock(&dq_data_lock);
			if (unlikely(rsv))
				dquot_resv_space(inode->i_dquot[cnt], rsv);
				spin_unlock(&dq_data_lock);
			}
		}
	}
out_err:
	up_write(&sb_dqopt(sb)->dqptr_sem);
	spin_unlock(&dq_data_lock);
	/* Drop unused references */
	dqput_all(got);
}
@@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode)
EXPORT_SYMBOL(dquot_initialize);

/*
 * 	Release all quotas referenced by inode
 * Release all quotas referenced by inode.
 *
 * This function only be called on inode free or converting
 * a file to quota file, no other users for the i_dquot in
 * both cases, so we needn't call synchronize_srcu() after
 * clearing i_dquot.
 */
static void __dquot_drop(struct inode *inode)
{
	int cnt;
	struct dquot *put[MAXQUOTAS];

	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
	spin_lock(&dq_data_lock);
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
		put[cnt] = inode->i_dquot[cnt];
		inode->i_dquot[cnt] = NULL;
	}
	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
	spin_unlock(&dq_data_lock);
	dqput_all(put);
}

@@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
 */
int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
	int cnt, ret = 0;
	int cnt, ret = 0, index;
	struct dquot_warn warn[MAXQUOTAS];
	struct dquot **dquots = inode->i_dquot;
	int reserve = flags & DQUOT_SPACE_RESERVE;

	/*
	 * First test before acquiring mutex - solves deadlocks when we
	 * re-enter the quota code and are already holding the mutex
	 */
	if (!dquot_active(inode)) {
		inode_incr_space(inode, number, reserve);
		goto out;
@@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
		warn[cnt].w_type = QUOTA_NL_NOWARN;

	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	index = srcu_read_lock(&dquot_srcu);
	spin_lock(&dq_data_lock);
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
		if (!dquots[cnt])
@@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
		goto out_flush_warn;
	mark_all_dquot_dirty(dquots);
out_flush_warn:
	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	srcu_read_unlock(&dquot_srcu, index);
	flush_warnings(warn);
out:
	return ret;
@@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space);
 */
int dquot_alloc_inode(const struct inode *inode)
{
	int cnt, ret = 0;
	int cnt, ret = 0, index;
	struct dquot_warn warn[MAXQUOTAS];
	struct dquot * const *dquots = inode->i_dquot;

	/* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
	if (!dquot_active(inode))
		return 0;
	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
		warn[cnt].w_type = QUOTA_NL_NOWARN;
	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);

	index = srcu_read_lock(&dquot_srcu);
	spin_lock(&dq_data_lock);
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
		if (!dquots[cnt])
@@ -1685,7 +1674,7 @@ int dquot_alloc_inode(const struct inode *inode)
	spin_unlock(&dq_data_lock);
	if (ret == 0)
		mark_all_dquot_dirty(dquots);
	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	srcu_read_unlock(&dquot_srcu, index);
	flush_warnings(warn);
	return ret;
}
@@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode);
 */
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
	int cnt;
	int cnt, index;

	if (!dquot_active(inode)) {
		inode_claim_rsv_space(inode, number);
		return 0;
	}

	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	index = srcu_read_lock(&dquot_srcu);
	spin_lock(&dq_data_lock);
	/* Claim reserved quotas to allocated quotas */
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
	inode_claim_rsv_space(inode, number);
	spin_unlock(&dq_data_lock);
	mark_all_dquot_dirty(inode->i_dquot);
	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	srcu_read_unlock(&dquot_srcu, index);
	return 0;
}
EXPORT_SYMBOL(dquot_claim_space_nodirty);
@@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
 */
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
	int cnt;
	int cnt, index;

	if (!dquot_active(inode)) {
		inode_reclaim_rsv_space(inode, number);
		return;
	}

	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	index = srcu_read_lock(&dquot_srcu);
	spin_lock(&dq_data_lock);
	/* Claim reserved quotas to allocated quotas */
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
	inode_reclaim_rsv_space(inode, number);
	spin_unlock(&dq_data_lock);
	mark_all_dquot_dirty(inode->i_dquot);
	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	srcu_read_unlock(&dquot_srcu, index);
	return;
}
EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
@@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
	unsigned int cnt;
	struct dquot_warn warn[MAXQUOTAS];
	struct dquot **dquots = inode->i_dquot;
	int reserve = flags & DQUOT_SPACE_RESERVE;
	int reserve = flags & DQUOT_SPACE_RESERVE, index;

	/* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
	if (!dquot_active(inode)) {
		inode_decr_space(inode, number, reserve);
		return;
	}

	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	index = srcu_read_lock(&dquot_srcu);
	spin_lock(&dq_data_lock);
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
		int wtype;
@@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
		goto out_unlock;
	mark_all_dquot_dirty(dquots);
out_unlock:
	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	srcu_read_unlock(&dquot_srcu, index);
	flush_warnings(warn);
}
EXPORT_SYMBOL(__dquot_free_space);
@@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode)
	unsigned int cnt;
	struct dquot_warn warn[MAXQUOTAS];
	struct dquot * const *dquots = inode->i_dquot;
	int index;

	/* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
	if (!dquot_active(inode))
		return;

	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	index = srcu_read_lock(&dquot_srcu);
	spin_lock(&dq_data_lock);
	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
		int wtype;
@@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode)
	}
	spin_unlock(&dq_data_lock);
	mark_all_dquot_dirty(dquots);
	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
	srcu_read_unlock(&dquot_srcu, index);
	flush_warnings(warn);
}
EXPORT_SYMBOL(dquot_free_inode);
@@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode);
 * This operation can block, but only after everything is updated
 * A transaction must be started when entering this function.
 *
 * We are holding reference on transfer_from & transfer_to, no need to
 * protect them by srcu_read_lock().
 */
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{
@@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
	struct dquot_warn warn_from_inodes[MAXQUOTAS];
	struct dquot_warn warn_from_space[MAXQUOTAS];

	/* First test before acquiring mutex - solves deadlocks when we
         * re-enter the quota code and are already holding the mutex */
	if (IS_NOQUOTA(inode))
		return 0;
	/* Initialize the arrays */
@@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
		warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
		warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
	}
	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);

	spin_lock(&dq_data_lock);
	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
		spin_unlock(&dq_data_lock);
		return 0;
	}
	spin_lock(&dq_data_lock);
	cur_space = inode_get_bytes(inode);
	rsv_space = inode_get_rsv_space(inode);
	space = cur_space + rsv_space;
@@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
		inode->i_dquot[cnt] = transfer_to[cnt];
	}
	spin_unlock(&dq_data_lock);
	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);

	mark_all_dquot_dirty(transfer_from);
	mark_all_dquot_dirty(transfer_to);
@@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
	return 0;
over_quota:
	spin_unlock(&dq_data_lock);
	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
	flush_warnings(warn_to);
	return ret;
}
+0 −1
Original line number Diff line number Diff line
@@ -218,7 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
	mutex_init(&s->s_dquot.dqio_mutex);
	mutex_init(&s->s_dquot.dqonoff_mutex);
	init_rwsem(&s->s_dquot.dqptr_sem);
	s->s_maxbytes = MAX_NON_LFS;
	s->s_op = &default_op;
	s->s_time_gran = 1000000000;
+0 −1
Original line number Diff line number Diff line
@@ -390,7 +390,6 @@ struct quota_info {
	unsigned int flags;			/* Flags for diskquotas on this device */
	struct mutex dqio_mutex;		/* lock device while I/O in progress */
	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */
	struct rw_semaphore dqptr_sem;		/* serialize ops using quota_info struct, pointers from inode to dquots */
	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
	const struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */