Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5cee5815 authored by Jan Kara's avatar Jan Kara Committed by Al Viro
Browse files

vfs: Make sys_sync() use fsync_super() (version 4)



It is unnecessarily fragile to have two places (fsync_super() and do_sync())
doing data integrity sync of the filesystem. Alter __fsync_super() to
accommodate needs of both callers and use it. So after this patch
__fsync_super() is the only place where we gather all the calls needed to
properly send all data on a filesystem to disk.

Nice bonus is that we get a complete livelock avoidance and write_supers()
is now only used for periodic writeback of superblocks.

sync_blockdevs() introduced a couple of patches ago is gone now.

[build fixes folded]

Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 429479f0
Loading
Loading
Loading
Loading
+10 −5
Original line number Diff line number Diff line
@@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
				iov, offset, nr_segs, blkdev_get_blocks, NULL);
}

int __sync_blockdev(struct block_device *bdev, int wait)
{
	if (!bdev)
		return 0;
	if (!wait)
		return filemap_flush(bdev->bd_inode->i_mapping);
	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
}

/*
 * Write out and wait upon all the dirty data associated with a block
 * device via its mapping.  Does not take the superblock lock.
 */
int sync_blockdev(struct block_device *bdev)
{
	int ret = 0;

	if (bdev)
		ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
	return ret;
	return __sync_blockdev(bdev, 1);
}
EXPORT_SYMBOL(sync_blockdev);

+0 −49
Original line number Diff line number Diff line
@@ -678,55 +678,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
	sync_sb_inodes(sb, &wbc);
}

/**
 * sync_inodes - writes all inodes to disk
 * @wait: wait for completion
 *
 * sync_inodes() goes through each super block's dirty inode list, writes the
 * inodes out, waits on the writeout and puts the inodes back on the normal
 * list.
 *
 * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
 * part of the sync functions is that the blockdev "superblock" is processed
 * last.  This is because the write_inode() function of a typical fs will
 * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
 * What we want to do is to perform all that dirtying first, and then write
 * back all those inode blocks via the blockdev mapping in one sweep.  So the
 * additional (somewhat redundant) sync_blockdev() calls here are to make
 * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
 * outstanding dirty inodes, the writeback goes block-at-a-time within the
 * filesystem's write_inode().  This is extremely slow.
 */
static void __sync_inodes(int wait)
{
	struct super_block *sb;

	spin_lock(&sb_lock);
restart:
	list_for_each_entry(sb, &super_blocks, s_list) {
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (sb->s_root) {
			sync_inodes_sb(sb, wait);
			sync_blockdev(sb->s_bdev);
		}
		up_read(&sb->s_umount);
		spin_lock(&sb_lock);
		if (__put_super_and_need_restart(sb))
			goto restart;
	}
	spin_unlock(&sb_lock);
}

void sync_inodes(int wait)
{
	__sync_inodes(0);

	if (wait)
		__sync_inodes(1);
}

/**
 * write_inode_now	-	write an inode to disk
 * @inode: inode to write to disk
+7 −9
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
	return sb == blockdev_superblock;
}

extern int __sync_blockdev(struct block_device *bdev, int wait);

#else
static inline void bdev_cache_init(void)
{
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
{
	return 0;
}

static inline int __sync_blockdev(struct block_device *bdev, int wait)
{
	return 0;
}
#endif

/*
@@ -71,12 +78,3 @@ extern void chroot_fs_refs(struct path *, struct path *);
 * file_table.c
 */
extern void mark_files_ro(struct super_block *);

/*
 * super.c
 */
#ifdef CONFIG_BLOCK
extern void sync_blockdevs(void);
#else
static inline void sync_blockdevs(void) { }
#endif
+23 −49
Original line number Diff line number Diff line
@@ -284,23 +284,23 @@ EXPORT_SYMBOL(lock_super);
EXPORT_SYMBOL(unlock_super);

/*
 * Write out and wait upon all dirty data associated with this
 * superblock.  Filesystem data as well as the underlying block
 * device.  Takes the superblock lock.  Requires a second blkdev
 * flush by the caller to complete the operation.
 * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
 * just dirties buffers with inodes so we have to submit IO for these buffers
 * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
 * case write_inode() functions do sync_dirty_buffer() and thus effectively
 * write one block at a time.
 */
static int __fsync_super(struct super_block *sb)
static int __fsync_super(struct super_block *sb, int wait)
{
	sync_inodes_sb(sb, 0);
	vfs_dq_sync(sb);
	sync_inodes_sb(sb, 1);
	sync_inodes_sb(sb, wait);
	lock_super(sb);
	if (sb->s_dirt && sb->s_op->write_super)
		sb->s_op->write_super(sb);
	unlock_super(sb);
	if (sb->s_op->sync_fs)
		sb->s_op->sync_fs(sb, 1);
	return sync_blockdev(sb->s_bdev);
		sb->s_op->sync_fs(sb, wait);
	return __sync_blockdev(sb->s_bdev, wait);
}

/*
@@ -310,7 +310,12 @@ static int __fsync_super(struct super_block *sb)
 */
int fsync_super(struct super_block *sb)
{
	return __fsync_super(sb);
	int ret;

	ret = __fsync_super(sb, 0);
	if (ret < 0)
		return ret;
	return __fsync_super(sb, 1);
}
EXPORT_SYMBOL_GPL(fsync_super);

@@ -469,20 +474,18 @@ restart:
}

/*
 * Call the ->sync_fs super_op against all filesystems which are r/w and
 * which implement it.
 * Sync all the data for all the filesystems (called by sys_sync() and
 * emergency sync)
 *
 * This operation is careful to avoid the livelock which could easily happen
 * if two or more filesystems are being continuously dirtied.  s_need_sync_fs
 * if two or more filesystems are being continuously dirtied.  s_need_sync
 * is used only here.  We set it against all filesystems and then clear it as
 * we sync them.  So redirtied filesystems are skipped.
 *
 * But if process A is currently running sync_filesystems and then process B
 * calls sync_filesystems as well, process B will set all the s_need_sync_fs
 * calls sync_filesystems as well, process B will set all the s_need_sync
 * flags again, which will cause process A to resync everything.  Fix that with
 * a local mutex.
 *
 * (Fabian) Avoid sync_fs with clean fs & wait mode 0
 */
void sync_filesystems(int wait)
{
@@ -492,25 +495,23 @@ void sync_filesystems(int wait)
	mutex_lock(&mutex);		/* Could be down_interruptible */
	spin_lock(&sb_lock);
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (!sb->s_op->sync_fs)
			continue;
		if (sb->s_flags & MS_RDONLY)
			continue;
		sb->s_need_sync_fs = 1;
		sb->s_need_sync = 1;
	}

restart:
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (!sb->s_need_sync_fs)
		if (!sb->s_need_sync)
			continue;
		sb->s_need_sync_fs = 0;
		sb->s_need_sync = 0;
		if (sb->s_flags & MS_RDONLY)
			continue;	/* hm.  Was remounted r/o meanwhile */
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (sb->s_root)
			sb->s_op->sync_fs(sb, wait);
			__fsync_super(sb, wait);
		up_read(&sb->s_umount);
		/* restart only when sb is no longer on the list */
		spin_lock(&sb_lock);
@@ -521,33 +522,6 @@ restart:
	mutex_unlock(&mutex);
}

#ifdef CONFIG_BLOCK
/*
 *  Sync all block devices underlying some superblock
 */
void sync_blockdevs(void)
{
	struct super_block *sb;

	spin_lock(&sb_lock);
restart:
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (!sb->s_bdev)
			continue;
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (sb->s_root)
			sync_blockdev(sb->s_bdev);
		up_read(&sb->s_umount);
		spin_lock(&sb_lock);
		if (__put_super_and_need_restart(sb))
			goto restart;
	}
	spin_unlock(&sb_lock);
}
#endif

/**
 *	get_super - get the superblock of a device
 *	@bdev: device to get the superblock for
+10 −21
Original line number Diff line number Diff line
@@ -18,35 +18,24 @@
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
			SYNC_FILE_RANGE_WAIT_AFTER)

/*
 * sync everything.  Start out by waking pdflush, because that writes back
 * all queues in parallel.
 */
static void do_sync(unsigned long wait)
SYSCALL_DEFINE0(sync)
{
	wakeup_pdflush(0);
	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
	vfs_dq_sync(NULL);
	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
	sync_supers();		/* Write the superblocks */
	sync_filesystems(0);	/* Start syncing the filesystems */
	sync_filesystems(wait);	/* Waitingly sync the filesystems */
	sync_blockdevs();
	if (!wait)
		printk("Emergency Sync complete\n");
	sync_filesystems(0);
	sync_filesystems(1);
	if (unlikely(laptop_mode))
		laptop_sync_completion();
}

SYSCALL_DEFINE0(sync)
{
	do_sync(1);
	return 0;
}

static void do_sync_work(struct work_struct *work)
{
	do_sync(0);
	/*
	 * Sync twice to reduce the possibility we skipped some inodes / pages
	 * because they were temporarily locked
	 */
	sync_filesystems(0);
	sync_filesystems(0);
	printk("Emergency Sync complete\n");
	kfree(work);
}

Loading