Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a3172027 authored by Chris Mason's avatar Chris Mason Committed by Linus Torvalds
Browse files

[PATCH] Fix reiserfs latencies caused by data=ordered



ReiserFS does periodic cleanup of old transactions in order to limit the
length of time a journal replay may take after a crash.  Sometimes, writing
metadata from an old (already committed) transaction may require committing
a newer transaction, which also requires writing all data=ordered buffers.
This can cause very long stalls on journal_begin.

This patch makes sure new transactions will not need to be committed before
trying a periodic reclaim of an old transaction.  It is low risk because if
a bad decision is made, it just means a slightly longer journal replay
after a crash.

Signed-off-by: default avatarChris Mason <mason@suse.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 25736b1c
Loading
Loading
Loading
Loading
+43 −11
Original line number Diff line number Diff line
@@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
	return NULL;
}

static int newer_jl_done(struct reiserfs_journal_cnode *cn)
{
	struct super_block *sb = cn->sb;
	b_blocknr_t blocknr = cn->blocknr;

	cn = cn->hprev;
	while (cn) {
		if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist &&
		    atomic_read(&cn->jlist->j_commit_left) != 0)
				    return 0;
		cn = cn->hprev;
	}
	return 1;
}

static void remove_journal_hash(struct super_block *,
				struct reiserfs_journal_cnode **,
				struct reiserfs_journal_list *, unsigned long,
@@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s,
	return err;
}

static int test_transaction(struct super_block *s,
                            struct reiserfs_journal_list *jl)
{
	struct reiserfs_journal_cnode *cn;

	if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0)
		return 1;

	cn = jl->j_realblock;
	while (cn) {
		/* if the blocknr == 0, this has been cleared from the hash,
		 ** skip it
		 */
		if (cn->blocknr == 0) {
			goto next;
		}
		if (cn->bh && !newer_jl_done(cn))
			return 0;
	      next:
		cn = cn->next;
		cond_resched();
	}
	return 0;
}

static int write_one_transaction(struct super_block *s,
				 struct reiserfs_journal_list *jl,
				 struct buffer_chunk *chunk)
@@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p)
		flush_commit_list(p_s_sb, jl, 1);
	}
	unlock_kernel();
	/*
	 * this is a little racey, but there's no harm in missing
	 * the filemap_fdata_write
	 */
	if (!atomic_read(&journal->j_async_throttle)
	    && !reiserfs_is_journal_aborted(journal)) {
		atomic_inc(&journal->j_async_throttle);
		filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
		atomic_dec(&journal->j_async_throttle);
	}
}

/*
@@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s)
		entry = journal->j_journal_list.next;
		jl = JOURNAL_LIST_ENTRY(entry);
		/* this check should always be run, to send old lists to disk */
		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
		if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) &&
		    atomic_read(&jl->j_commit_left) == 0 &&
		    test_transaction(s, jl)) {
			flush_used_journal_lists(s, jl);
		} else {
			break;