Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 15cf3b7a authored by Al Viro's avatar Al Viro
Browse files

Merge branch 'sb_writers_pcpu_rwsem' of...

Merge branch 'sb_writers_pcpu_rwsem' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into for-next
parents 2c6625cd 8129ed29
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -87,7 +87,6 @@ config KPROBES_ON_FTRACE

config UPROBES
	def_bool n
	select PERCPU_RWSEM
	help
	  Uprobes is the user-space counterpart to kprobes: they
	  enable instrumentation applications (such as 'perf probe')
+2 −6
Original line number Diff line number Diff line
@@ -1638,9 +1638,7 @@ static void do_async_commit(struct work_struct *work)
	 * Tell lockdep about it.
	 */
	if (ac->newtrans->type & __TRANS_FREEZABLE)
		rwsem_acquire_read(
		     &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
		     0, 1, _THIS_IP_);
		__sb_writers_acquired(ac->root->fs_info->sb, SB_FREEZE_FS);

	current->journal_info = ac->newtrans;

@@ -1679,9 +1677,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
	 * async commit thread will be the one to unlock it.
	 */
	if (ac->newtrans->type & __TRANS_FREEZABLE)
		rwsem_release(
			&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
			1, _THIS_IP_);
		__sb_writers_release(root->fs_info->sb, SB_FREEZE_FS);

	schedule_work(&ac->work);

+74 −97
Original line number Diff line number Diff line
@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
	return total_objects;
}

static void destroy_super_work(struct work_struct *work)
{
	struct super_block *s = container_of(work, struct super_block,
							destroy_work);
	int i;

	for (i = 0; i < SB_FREEZE_LEVELS; i++)
		percpu_free_rwsem(&s->s_writers.rw_sem[i]);
	kfree(s);
}

static void destroy_super_rcu(struct rcu_head *head)
{
	struct super_block *s = container_of(head, struct super_block, rcu);
	INIT_WORK(&s->destroy_work, destroy_super_work);
	schedule_work(&s->destroy_work);
}

/**
 *	destroy_super	-	frees a superblock
 *	@s: superblock to free
@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 */
static void destroy_super(struct super_block *s)
{
	int i;
	list_lru_destroy(&s->s_dentry_lru);
	list_lru_destroy(&s->s_inode_lru);
	for (i = 0; i < SB_FREEZE_LEVELS; i++)
		percpu_counter_destroy(&s->s_writers.counter[i]);
	security_sb_free(s);
	WARN_ON(!list_empty(&s->s_mounts));
	kfree(s->s_subtype);
	kfree(s->s_options);
	kfree_rcu(s, rcu);
	call_rcu(&s->rcu, destroy_super_rcu);
}

/**
@@ -178,13 +193,11 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
		goto fail;

	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
		if (percpu_counter_init(&s->s_writers.counter[i], 0,
					GFP_KERNEL) < 0)
		if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
					sb_writers_name[i],
					&type->s_writers_key[i]))
			goto fail;
		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
				 &type->s_writers_key[i], 0);
	}
	init_waitqueue_head(&s->s_writers.wait);
	init_waitqueue_head(&s->s_writers.wait_unfrozen);
	s->s_bdi = &noop_backing_dev_info;
	s->s_flags = flags;
@@ -1146,72 +1159,46 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
 */
void __sb_end_write(struct super_block *sb, int level)
{
	percpu_counter_dec(&sb->s_writers.counter[level-1]);
	/*
	 * Make sure s_writers are updated before we wake up waiters in
	 * freeze_super().
	 */
	smp_mb();
	if (waitqueue_active(&sb->s_writers.wait))
		wake_up(&sb->s_writers.wait);
	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
	percpu_up_read(sb->s_writers.rw_sem + level-1);
}
EXPORT_SYMBOL(__sb_end_write);

#ifdef CONFIG_LOCKDEP
/*
 * We want lockdep to tell us about possible deadlocks with freezing but
 * it's it bit tricky to properly instrument it. Getting a freeze protection
 * works as getting a read lock but there are subtle problems. XFS for example
 * gets freeze protection on internal level twice in some cases, which is OK
 * only because we already hold a freeze protection also on higher level. Due
 * to these cases we have to tell lockdep we are doing trylock when we
 * already hold a freeze protection for a higher freeze level.
 */
static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
				unsigned long ip)
{
	int i;

	if (!trylock) {
		for (i = 0; i < level - 1; i++)
			if (lock_is_held(&sb->s_writers.lock_map[i])) {
				trylock = true;
				break;
			}
	}
	rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
}
#endif

/*
 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
 * instead.
 */
int __sb_start_write(struct super_block *sb, int level, bool wait)
{
retry:
	if (unlikely(sb->s_writers.frozen >= level)) {
		if (!wait)
			return 0;
		wait_event(sb->s_writers.wait_unfrozen,
			   sb->s_writers.frozen < level);
	}
	bool force_trylock = false;
	int ret = 1;

#ifdef CONFIG_LOCKDEP
	acquire_freeze_lock(sb, level, !wait, _RET_IP_);
#endif
	percpu_counter_inc(&sb->s_writers.counter[level-1]);
	/*
	 * Make sure counter is updated before we check for frozen.
	 * freeze_super() first sets frozen and then checks the counter.
	 * We want lockdep to tell us about possible deadlocks with freezing
	 * but it's it bit tricky to properly instrument it. Getting a freeze
	 * protection works as getting a read lock but there are subtle
	 * problems. XFS for example gets freeze protection on internal level
	 * twice in some cases, which is OK only because we already hold a
	 * freeze protection also on higher level. Due to these cases we have
	 * to use wait == F (trylock mode) which must not fail.
	 */
	smp_mb();
	if (unlikely(sb->s_writers.frozen >= level)) {
		__sb_end_write(sb, level);
		goto retry;
	if (wait) {
		int i;

		for (i = 0; i < level - 1; i++)
			if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
				force_trylock = true;
				break;
			}
	return 1;
	}
#endif
	if (wait && !force_trylock)
		percpu_down_read(sb->s_writers.rw_sem + level-1);
	else
		ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);

	WARN_ON(force_trylock & !ret);
	return ret;
}
EXPORT_SYMBOL(__sb_start_write);

@@ -1221,37 +1208,33 @@ EXPORT_SYMBOL(__sb_start_write);
 * @level: type of writers we wait for (normal vs page fault)
 *
 * This function waits until there are no writers of given type to given file
 * system. Caller of this function should make sure there can be no new writers
 * of type @level before calling this function. Otherwise this function can
 * livelock.
 * system.
 */
static void sb_wait_write(struct super_block *sb, int level)
{
	s64 writers;

	percpu_down_write(sb->s_writers.rw_sem + level-1);
	/*
	 * We just cycle-through lockdep here so that it does not complain
	 * about returning with lock to userspace
	 * We are going to return to userspace and forget about this lock, the
	 * ownership goes to the caller of thaw_super() which does unlock.
	 *
	 * FIXME: we should do this before return from freeze_super() after we
	 * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
	 * should re-acquire these locks before s_op->unfreeze_fs(sb). However
	 * this leads to lockdep false-positives, so currently we do the early
	 * release right after acquire.
	 */
	rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
	rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);

	do {
		DEFINE_WAIT(wait);
	percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
}

		/*
		 * We use a barrier in prepare_to_wait() to separate setting
		 * of frozen and checking of the counter
		 */
		prepare_to_wait(&sb->s_writers.wait, &wait,
				TASK_UNINTERRUPTIBLE);
static void sb_freeze_unlock(struct super_block *sb)
{
	int level;

		writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
		if (writers)
			schedule();
	for (level = 0; level < SB_FREEZE_LEVELS; ++level)
		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);

		finish_wait(&sb->s_writers.wait, &wait);
	} while (writers);
	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
		percpu_up_write(sb->s_writers.rw_sem + level);
}

/**
@@ -1310,20 +1293,14 @@ int freeze_super(struct super_block *sb)
		return 0;
	}

	/* From now on, no new normal writers can start */
	sb->s_writers.frozen = SB_FREEZE_WRITE;
	smp_wmb();

	/* Release s_umount to preserve sb_start_write -> s_umount ordering */
	up_write(&sb->s_umount);

	sb_wait_write(sb, SB_FREEZE_WRITE);
	down_write(&sb->s_umount);

	/* Now we go and block page faults... */
	down_write(&sb->s_umount);
	sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
	smp_wmb();

	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);

	/* All writers are done so after syncing there won't be dirty data */
@@ -1331,7 +1308,6 @@ int freeze_super(struct super_block *sb)

	/* Now wait for internal filesystem counter */
	sb->s_writers.frozen = SB_FREEZE_FS;
	smp_wmb();
	sb_wait_write(sb, SB_FREEZE_FS);

	if (sb->s_op->freeze_fs) {
@@ -1340,7 +1316,7 @@ int freeze_super(struct super_block *sb)
			printk(KERN_ERR
				"VFS:Filesystem freeze failed\n");
			sb->s_writers.frozen = SB_UNFROZEN;
			smp_wmb();
			sb_freeze_unlock(sb);
			wake_up(&sb->s_writers.wait_unfrozen);
			deactivate_locked_super(sb);
			return ret;
@@ -1372,8 +1348,10 @@ int thaw_super(struct super_block *sb)
		return -EINVAL;
	}

	if (sb->s_flags & MS_RDONLY)
	if (sb->s_flags & MS_RDONLY) {
		sb->s_writers.frozen = SB_UNFROZEN;
		goto out;
	}

	if (sb->s_op->unfreeze_fs) {
		error = sb->s_op->unfreeze_fs(sb);
@@ -1385,12 +1363,11 @@ int thaw_super(struct super_block *sb)
		}
	}

out:
	sb->s_writers.frozen = SB_UNFROZEN;
	smp_wmb();
	sb_freeze_unlock(sb);
out:
	wake_up(&sb->s_writers.wait_unfrozen);
	deactivate_locked_super(sb);

	return 0;
}
EXPORT_SYMBOL(thaw_super);
+2 −4
Original line number Diff line number Diff line
@@ -119,8 +119,7 @@ xfs_setfilesize_trans_alloc(
	 * We may pass freeze protection with a transaction.  So tell lockdep
	 * we released it.
	 */
	rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
		      1, _THIS_IP_);
	__sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
	/*
	 * We hand off the transaction to the completion thread now, so
	 * clear the flag here.
@@ -171,8 +170,7 @@ xfs_setfilesize_ioend(
	 * Similarly for freeze protection.
	 */
	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
	rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
			   0, 1, _THIS_IP_);
	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);

	return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
}
+11 −12
Original line number Diff line number Diff line
#ifndef _LINUX_FS_H
#define _LINUX_FS_H


#include <linux/linkage.h>
#include <linux/wait.h>
#include <linux/kdev_t.h>
@@ -30,6 +29,8 @@
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/blk_types.h>
#include <linux/workqueue.h>
#include <linux/percpu-rwsem.h>

#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1274,16 +1275,9 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)

struct sb_writers {
	/* Counters for counting writers at each level */
	struct percpu_counter	counter[SB_FREEZE_LEVELS];
	wait_queue_head_t	wait;		/* queue for waiting for
						   writers / faults to finish */
	int				frozen;		/* Is sb frozen? */
	wait_queue_head_t	wait_unfrozen;	/* queue for waiting for
						   sb to be thawed */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	struct lockdep_map	lock_map[SB_FREEZE_LEVELS];
#endif
	wait_queue_head_t		wait_unfrozen;	/* for get_super_thawed() */
	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
};

struct super_block {
@@ -1375,7 +1369,7 @@ struct super_block {
	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
	struct rcu_head		rcu;

	struct work_struct	destroy_work;
	/*
	 * Indicates how deep in a filesystem stack this SB is
	 */
@@ -1391,6 +1385,11 @@ extern struct timespec current_fs_time(struct super_block *sb);
void __sb_end_write(struct super_block *sb, int level);
int __sb_start_write(struct super_block *sb, int level, bool wait);

#define __sb_writers_acquired(sb, lev)	\
	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
#define __sb_writers_release(sb, lev)	\
	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)

/**
 * sb_end_write - drop write access to a superblock
 * @sb: the super we wrote to
Loading