Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b34090e5 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o
Browse files

jbd2: refine waiting for shadow buffers



Currently when we add a buffer to a transaction, we wait until the
buffer is removed from BJ_Shadow list (so that we prevent any changes
to the buffer that is just written to the journal).  This can take
unnecessarily long as a lot happens between the time the buffer is
submitted to the journal and the time when we remove the buffer from
BJ_Shadow list.  (e.g.  We wait for all data buffers in the
transaction, we issue a cache flush, etc.)  Also this creates a
dependency of do_get_write_access() on transaction commit (namely
waiting for data IO to complete) which we want to avoid when
implementing transaction reservation.

So we modify commit code to set new BH_Shadow flag when temporary
shadowing buffer is created and we clear that flag once IO on that
buffer is complete.  This allows do_get_write_access() to wait only
for BH_Shadow bit and thus removes the dependency on data IO
completion.

Reviewed-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent e5a120ae
Loading
Loading
Loading
Loading
+9 −9
Original line number Diff line number Diff line
@@ -30,15 +30,22 @@
#include <trace/events/jbd2.h>

/*
 * Default IO end handler for temporary BJ_IO buffer_heads.
 * IO end handler for temporary buffer_heads handling writes to the journal.
 */
static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
	struct buffer_head *orig_bh = bh->b_private;

	BUFFER_TRACE(bh, "");
	if (uptodate)
		set_buffer_uptodate(bh);
	else
		clear_buffer_uptodate(bh);
	if (orig_bh) {
		clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
		smp_mb__after_clear_bit();
		wake_up_bit(&orig_bh->b_state, BH_Shadow);
	}
	unlock_buffer(bh);
}

@@ -832,6 +839,7 @@ start_journal_io:
		bh = jh2bh(jh);
		clear_buffer_jwrite(bh);
		J_ASSERT_BH(bh, buffer_jbddirty(bh));
		J_ASSERT_BH(bh, !buffer_shadow(bh));

		/* The metadata is now released for reuse, but we need
                   to remember it against this transaction so that when
@@ -839,14 +847,6 @@ start_journal_io:
                   required. */
		JBUFFER_TRACE(jh, "file as BJ_Forget");
		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
		/*
		 * Wake up any transactions which were waiting for this IO to
		 * complete. The barrier must be here so that changes by
		 * jbd2_journal_file_buffer() take effect before wake_up_bit()
		 * does the waitqueue check.
		 */
		smp_mb();
		wake_up_bit(&bh->b_state, BH_Unshadow);
		JBUFFER_TRACE(jh, "brelse shadowed buffer");
		__brelse(bh);
	}
+2 −0
Original line number Diff line number Diff line
@@ -451,6 +451,7 @@ repeat:
	new_bh->b_size = bh_in->b_size;
	new_bh->b_bdev = journal->j_dev;
	new_bh->b_blocknr = blocknr;
	new_bh->b_private = bh_in;
	set_buffer_mapped(new_bh);
	set_buffer_dirty(new_bh);

@@ -465,6 +466,7 @@ repeat:
	spin_lock(&journal->j_list_lock);
	__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
	spin_unlock(&journal->j_list_lock);
	set_buffer_shadow(bh_in);
	jbd_unlock_bh_state(bh_in);

	return do_escape | (done_copy_out << 1);
+19 −25
Original line number Diff line number Diff line
@@ -619,6 +619,12 @@ static void warn_dirty_buffer(struct buffer_head *bh)
	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}

static int sleep_on_shadow_bh(void *word)
{
	io_schedule();
	return 0;
}

/*
 * If the buffer is already part of the current transaction, then there
 * is nothing we need to do.  If it is already part of a prior
@@ -754,41 +760,29 @@ repeat:
		 * journaled.  If the primary copy is already going to
		 * disk then we cannot do copy-out here. */

		if (jh->b_jlist == BJ_Shadow) {
			DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
			wait_queue_head_t *wqh;

			wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);

		if (buffer_shadow(bh)) {
			JBUFFER_TRACE(jh, "on shadow: sleep");
			jbd_unlock_bh_state(bh);
			/* commit wakes up all shadow buffers after IO */
			for ( ; ; ) {
				prepare_to_wait(wqh, &wait.wait,
						TASK_UNINTERRUPTIBLE);
				if (jh->b_jlist != BJ_Shadow)
					break;
				schedule();
			}
			finish_wait(wqh, &wait.wait);
			wait_on_bit(&bh->b_state, BH_Shadow,
				    sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
			goto repeat;
		}

		/* Only do the copy if the currently-owning transaction
		 * still needs it.  If it is on the Forget list, the
		 * committing transaction is past that stage.  The
		 * buffer had better remain locked during the kmalloc,
		 * but that should be true --- we hold the journal lock
		 * still and the buffer is already on the BUF_JOURNAL
		 * list so won't be flushed.
		/*
		 * Only do the copy if the currently-owning transaction still
		 * needs it. If buffer isn't on BJ_Metadata list, the
		 * committing transaction is past that stage (here we use the
		 * fact that BH_Shadow is set under bh_state lock together with
		 * refiling to BJ_Shadow list and at this point we know the
		 * buffer doesn't have BH_Shadow set).
		 *
		 * Subtle point, though: if this is a get_undo_access,
		 * then we will be relying on the frozen_data to contain
		 * the new value of the committed_data record after the
		 * transaction, so we HAVE to force the frozen_data copy
		 * in that case. */

		if (jh->b_jlist != BJ_Forget || force_copy) {
		 * in that case.
		 */
		if (jh->b_jlist == BJ_Metadata || force_copy) {
			JBUFFER_TRACE(jh, "generate frozen data");
			if (!frozen_buffer) {
				JBUFFER_TRACE(jh, "allocate memory for buffer");
+25 −0
Original line number Diff line number Diff line
@@ -244,6 +244,31 @@ typedef struct journal_superblock_s

#include <linux/fs.h>
#include <linux/sched.h>

enum jbd_state_bits {
	BH_JBD			/* Has an attached ext3 journal_head */
	  = BH_PrivateStart,
	BH_JWrite,		/* Being written to log (@@@ DEBUGGING) */
	BH_Freed,		/* Has been freed (truncated) */
	BH_Revoked,		/* Has been revoked from the log */
	BH_RevokeValid,		/* Revoked flag is valid */
	BH_JBDDirty,		/* Is dirty but journaled */
	BH_State,		/* Pins most journal_head state */
	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
	BH_JBDPrivateStart,	/* First bit available for private use by FS */
};

BUFFER_FNS(JBD, jbd)
BUFFER_FNS(JWrite, jwrite)
BUFFER_FNS(JBDDirty, jbddirty)
TAS_BUFFER_FNS(JBDDirty, jbddirty)
BUFFER_FNS(Revoked, revoked)
TAS_BUFFER_FNS(Revoked, revoked)
BUFFER_FNS(RevokeValid, revokevalid)
TAS_BUFFER_FNS(RevokeValid, revokevalid)
BUFFER_FNS(Freed, freed)

#include <linux/jbd_common.h>

#define J_ASSERT(assert)	BUG_ON(!(assert))
+28 −0
Original line number Diff line number Diff line
@@ -302,6 +302,34 @@ typedef struct journal_superblock_s

#include <linux/fs.h>
#include <linux/sched.h>

enum jbd_state_bits {
	BH_JBD			/* Has an attached ext3 journal_head */
	  = BH_PrivateStart,
	BH_JWrite,		/* Being written to log (@@@ DEBUGGING) */
	BH_Freed,		/* Has been freed (truncated) */
	BH_Revoked,		/* Has been revoked from the log */
	BH_RevokeValid,		/* Revoked flag is valid */
	BH_JBDDirty,		/* Is dirty but journaled */
	BH_State,		/* Pins most journal_head state */
	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
	BH_Shadow,		/* IO on shadow buffer is running */
	BH_Verified,		/* Metadata block has been verified ok */
	BH_JBDPrivateStart,	/* First bit available for private use by FS */
};

BUFFER_FNS(JBD, jbd)
BUFFER_FNS(JWrite, jwrite)
BUFFER_FNS(JBDDirty, jbddirty)
TAS_BUFFER_FNS(JBDDirty, jbddirty)
BUFFER_FNS(Revoked, revoked)
TAS_BUFFER_FNS(Revoked, revoked)
BUFFER_FNS(RevokeValid, revokevalid)
TAS_BUFFER_FNS(RevokeValid, revokevalid)
BUFFER_FNS(Freed, freed)
BUFFER_FNS(Shadow, shadow)
BUFFER_FNS(Verified, verified)

#include <linux/jbd_common.h>

#define J_ASSERT(assert)	BUG_ON(!(assert))
Loading