Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 618f0636 authored by Kirill Korotaev's avatar Kirill Korotaev Committed by Linus Torvalds
Browse files

[PATCH] O(1) sb list traversing on syncs



This patch removes O(n^2) super block loops in sync_inodes(),
sync_filesystems() etc.  in favour of using __put_super_and_need_restart()
which I introduced earlier.  We faced a noticably long freezes on sb
syncing when there are thousands of super blocks in the system.

Signed-Off-By: default avatarKirill Korotaev <dev@sw.ru>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4fea2838
Loading
Loading
Loading
Loading
+27 −37
Original line number Diff line number Diff line
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
	spin_unlock(&sb_lock);
}

/*
 * Find a superblock with inodes that need to be synced
 */
static struct super_block *get_super_to_sync(void)
{
	struct super_block *sb;
restart:
	spin_lock(&sb_lock);
	sb = sb_entry(super_blocks.prev);
	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
		if (sb->s_syncing)
			continue;
		sb->s_syncing = 1;
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (!sb->s_root) {
			drop_super(sb);
			goto restart;
		}
		return sb;
	}
	spin_unlock(&sb_lock);
	return NULL;
}

/**
 * sync_inodes - writes all inodes to disk
 * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
 * outstanding dirty inodes, the writeback goes block-at-a-time within the
 * filesystem's write_inode().  This is extremely slow.
 */
void sync_inodes(int wait)
static void __sync_inodes(int wait)
{
	struct super_block *sb;

	set_sb_syncing(0);
	while ((sb = get_super_to_sync()) != NULL) {
		sync_inodes_sb(sb, 0);
	spin_lock(&sb_lock);
restart:
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (sb->s_syncing)
			continue;
		sb->s_syncing = 1;
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (sb->s_root) {
			sync_inodes_sb(sb, wait);
			sync_blockdev(sb->s_bdev);
		drop_super(sb);
		}
		up_read(&sb->s_umount);
		spin_lock(&sb_lock);
		if (__put_super_and_need_restart(sb))
			goto restart;
	}
	spin_unlock(&sb_lock);
}

void sync_inodes(int wait)
{
	set_sb_syncing(0);
	__sync_inodes(0);

	if (wait) {
		set_sb_syncing(0);
		while ((sb = get_super_to_sync()) != NULL) {
			sync_inodes_sb(sb, 1);
			sync_blockdev(sb->s_bdev);
			drop_super(sb);
		}
		__sync_inodes(1);
	}
}

+24 −36
Original line number Diff line number Diff line
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
	return error;
}

static struct super_block *get_super_to_sync(int type)
{
	struct list_head *head;
	int cnt, dirty;

restart:
	spin_lock(&sb_lock);
	list_for_each(head, &super_blocks) {
		struct super_block *sb = list_entry(head, struct super_block, s_list);

		/* This test just improves performance so it needn't be reliable... */
		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
				dirty = 1;
		if (!dirty)
			continue;
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (!sb->s_root) {
			drop_super(sb);
			goto restart;
		}
		return sb;
	}
	spin_unlock(&sb_lock);
	return NULL;
}

static void quota_sync_sb(struct super_block *sb, int type)
{
	int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)

void sync_dquots(struct super_block *sb, int type)
{
	int cnt, dirty;

	if (sb) {
		if (sb->s_qcop->quota_sync)
			quota_sync_sb(sb, type);
		return;
	}
	else {
		while ((sb = get_super_to_sync(type)) != NULL) {
			if (sb->s_qcop->quota_sync)

	spin_lock(&sb_lock);
restart:
	list_for_each_entry(sb, &super_blocks, s_list) {
		/* This test just improves performance so it needn't be reliable... */
		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
				dirty = 1;
		if (!dirty)
			continue;
		sb->s_count++;
		spin_unlock(&sb_lock);
		down_read(&sb->s_umount);
		if (sb->s_root && sb->s_qcop->quota_sync)
			quota_sync_sb(sb, type);
			drop_super(sb);
		}
		up_read(&sb->s_umount);
		spin_lock(&sb_lock);
		if (__put_super_and_need_restart(sb))
			goto restart;
	}
	spin_unlock(&sb_lock);
}

/* Copy parameters and call proper function */
+45 −38
Original line number Diff line number Diff line
@@ -342,19 +342,21 @@ static inline void write_super(struct super_block *sb)
void sync_supers(void)
{
	struct super_block *sb;
restart:

	spin_lock(&sb_lock);
	sb = sb_entry(super_blocks.next);
	while (sb != sb_entry(&super_blocks))
restart:
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (sb->s_dirt) {
			sb->s_count++;
			spin_unlock(&sb_lock);
			down_read(&sb->s_umount);
			write_super(sb);
			drop_super(sb);
			up_read(&sb->s_umount);
			spin_lock(&sb_lock);
			if (__put_super_and_need_restart(sb))
				goto restart;
		} else
			sb = sb_entry(sb->s_list.next);
		}
	}
	spin_unlock(&sb_lock);
}

@@ -381,20 +383,16 @@ void sync_filesystems(int wait)

	down(&mutex);		/* Could be down_interruptible */
	spin_lock(&sb_lock);
	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
			sb = sb_entry(sb->s_list.next)) {
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (!sb->s_op->sync_fs)
			continue;
		if (sb->s_flags & MS_RDONLY)
			continue;
		sb->s_need_sync_fs = 1;
	}
	spin_unlock(&sb_lock);

restart:
	spin_lock(&sb_lock);
	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
			sb = sb_entry(sb->s_list.next)) {
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (!sb->s_need_sync_fs)
			continue;
		sb->s_need_sync_fs = 0;
@@ -405,7 +403,10 @@ restart:
		down_read(&sb->s_umount);
		if (sb->s_root && (wait || sb->s_dirt))
			sb->s_op->sync_fs(sb, wait);
		drop_super(sb);
		up_read(&sb->s_umount);
		/* restart only when sb is no longer on the list */
		spin_lock(&sb_lock);
		if (__put_super_and_need_restart(sb))
			goto restart;
	}
	spin_unlock(&sb_lock);
@@ -422,20 +423,24 @@ restart:

struct super_block * get_super(struct block_device *bdev)
{
	struct list_head *p;
	struct super_block *sb;

	if (!bdev)
		return NULL;
rescan:

	spin_lock(&sb_lock);
	list_for_each(p, &super_blocks) {
		struct super_block *s = sb_entry(p);
		if (s->s_bdev == bdev) {
			s->s_count++;
rescan:
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (sb->s_bdev == bdev) {
			sb->s_count++;
			spin_unlock(&sb_lock);
			down_read(&s->s_umount);
			if (s->s_root)
				return s;
			drop_super(s);
			down_read(&sb->s_umount);
			if (sb->s_root)
				return sb;
			up_read(&sb->s_umount);
			/* restart only when sb is no longer on the list */
			spin_lock(&sb_lock);
			if (__put_super_and_need_restart(sb))
				goto rescan;
		}
	}
@@ -447,19 +452,21 @@ EXPORT_SYMBOL(get_super);
 
struct super_block * user_get_super(dev_t dev)
{
	struct list_head *p;
	struct super_block *sb;

rescan:
	spin_lock(&sb_lock);
	list_for_each(p, &super_blocks) {
		struct super_block *s = sb_entry(p);
		if (s->s_dev ==  dev) {
			s->s_count++;
rescan:
	list_for_each_entry(sb, &super_blocks, s_list) {
		if (sb->s_dev ==  dev) {
			sb->s_count++;
			spin_unlock(&sb_lock);
			down_read(&s->s_umount);
			if (s->s_root)
				return s;
			drop_super(s);
			down_read(&sb->s_umount);
			if (sb->s_root)
				return sb;
			up_read(&sb->s_umount);
			/* restart only when sb is no longer on the list */
			spin_lock(&sb_lock);
			if (__put_super_and_need_restart(sb))
				goto rescan;
		}
	}