Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 937cb207 authored by Zhang Yi's avatar Zhang Yi Committed by Greg Kroah-Hartman
Browse files

jbd2: recheck chechpointing non-dirty buffer



[ Upstream commit c2d6fd9d6f35079f1669f0100f05b46708c74b7f ]

There is a long-standing metadata corruption issue that happens from
time to time, but it's very difficult to reproduce and analyse, benefit
from the JBD2_CYCLE_RECORD option, we found out that the problem is the
checkpointing process miss to write out some buffers which are raced by
another do_get_write_access(). Looks below for detail.

jbd2_log_do_checkpoint() //transaction X
 //buffer A is dirty and not belones to any transaction
 __buffer_relink_io() //move it to the IO list
 __flush_batch()
  write_dirty_buffer()
                             do_get_write_access()
                             clear_buffer_dirty
                             __jbd2_journal_file_buffer()
                             //add buffer A to a new transaction Y
   lock_buffer(bh)
   //doesn't write out
 __jbd2_journal_remove_checkpoint()
 //finish checkpoint except buffer A
 //filesystem corrupt if the new transaction Y isn't fully write out.

Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint()
have already handles waiting for buffers under IO and re-added new
transaction to complete commit, and it also removing cleaned buffers,
this makes sure the list will eventually get empty. So it's fine to
leave buffers on the t_checkpoint_list while flushing out and completely
stop using the t_checkpoint_io_list.

Cc: stable@vger.kernel.org
Suggested-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarZhang Yi <yi.zhang@huawei.com>
Tested-by: default avatarZhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230606135928.434610-2-yi.zhang@huaweicloud.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Stable-dep-of: e34c8dd238d0 ("jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint")
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent acc9a81f
Loading
Loading
Loading
Loading
+29 −73
Original line number Original line Diff line number Diff line
@@ -57,28 +57,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
	}
	}
}
}


/*
 * Move a buffer from the checkpoint list to the checkpoint io list
 *
 * Called with j_list_lock held
 */
static inline void __buffer_relink_io(struct journal_head *jh)
{
	transaction_t *transaction = jh->b_cp_transaction;

	__buffer_unlink_first(jh);

	if (!transaction->t_checkpoint_io_list) {
		jh->b_cpnext = jh->b_cpprev = jh;
	} else {
		jh->b_cpnext = transaction->t_checkpoint_io_list;
		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
		jh->b_cpprev->b_cpnext = jh;
		jh->b_cpnext->b_cpprev = jh;
	}
	transaction->t_checkpoint_io_list = jh;
}

/*
/*
 * Try to release a checkpointed buffer from its transaction.
 * Try to release a checkpointed buffer from its transaction.
 * Returns 1 if we released it and 2 if we also released the
 * Returns 1 if we released it and 2 if we also released the
@@ -190,6 +168,7 @@ __flush_batch(journal_t *journal, int *batch_count)
		struct buffer_head *bh = journal->j_chkpt_bhs[i];
		struct buffer_head *bh = journal->j_chkpt_bhs[i];
		BUFFER_TRACE(bh, "brelse");
		BUFFER_TRACE(bh, "brelse");
		__brelse(bh);
		__brelse(bh);
		journal->j_chkpt_bhs[i] = NULL;
	}
	}
	*batch_count = 0;
	*batch_count = 0;
}
}
@@ -249,6 +228,11 @@ int jbd2_log_do_checkpoint(journal_t *journal)
		jh = transaction->t_checkpoint_list;
		jh = transaction->t_checkpoint_list;
		bh = jh2bh(jh);
		bh = jh2bh(jh);


		/*
		 * The buffer may be writing back, or flushing out in the
		 * last couple of cycles, or re-adding into a new transaction,
		 * need to check it again until it's unlocked.
		 */
		if (buffer_locked(bh)) {
		if (buffer_locked(bh)) {
			get_bh(bh);
			get_bh(bh);
			spin_unlock(&journal->j_list_lock);
			spin_unlock(&journal->j_list_lock);
@@ -294,28 +278,32 @@ int jbd2_log_do_checkpoint(journal_t *journal)
		}
		}
		if (!buffer_dirty(bh)) {
		if (!buffer_dirty(bh)) {
			BUFFER_TRACE(bh, "remove from checkpoint");
			BUFFER_TRACE(bh, "remove from checkpoint");
			if (__jbd2_journal_remove_checkpoint(jh))
			/*
				/* The transaction was released; we're done */
			 * If the transaction was released or the checkpoint
			 * list was empty, we're done.
			 */
			if (__jbd2_journal_remove_checkpoint(jh) ||
			    !transaction->t_checkpoint_list)
				goto out;
				goto out;
			continue;
		} else {
		}
			/*
			/*
		 * Important: we are about to write the buffer, and
			 * We are about to write the buffer, it could be
		 * possibly block, while still holding the journal
			 * raced by some other transaction shrink or buffer
		 * lock.  We cannot afford to let the transaction
			 * re-log logic once we release the j_list_lock,
		 * logic start messing around with this buffer before
			 * leave it on the checkpoint list and check status
		 * we write it to disk, as that would break
			 * again to make sure it's clean.
		 * recoverability.
			 */
			 */
			BUFFER_TRACE(bh, "queue");
			BUFFER_TRACE(bh, "queue");
			get_bh(bh);
			get_bh(bh);
			J_ASSERT_BH(bh, !buffer_jwrite(bh));
			J_ASSERT_BH(bh, !buffer_jwrite(bh));
			journal->j_chkpt_bhs[batch_count++] = bh;
			journal->j_chkpt_bhs[batch_count++] = bh;
		__buffer_relink_io(jh);
			transaction->t_chp_stats.cs_written++;
			transaction->t_chp_stats.cs_written++;
			transaction->t_checkpoint_list = jh->b_cpnext;
		}

		if ((batch_count == JBD2_NR_BATCH) ||
		if ((batch_count == JBD2_NR_BATCH) ||
		    need_resched() ||
		    need_resched() || spin_needbreak(&journal->j_list_lock) ||
		    spin_needbreak(&journal->j_list_lock))
		    jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
			goto unlock_and_flush;
			goto unlock_and_flush;
	}
	}


@@ -329,38 +317,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
			goto restart;
			goto restart;
	}
	}


	/*
	 * Now we issued all of the transaction's buffers, let's deal
	 * with the buffers that are out for I/O.
	 */
restart2:
	/* Did somebody clean up the transaction in the meanwhile? */
	if (journal->j_checkpoint_transactions != transaction ||
	    transaction->t_tid != this_tid)
		goto out;

	while (transaction->t_checkpoint_io_list) {
		jh = transaction->t_checkpoint_io_list;
		bh = jh2bh(jh);
		if (buffer_locked(bh)) {
			get_bh(bh);
			spin_unlock(&journal->j_list_lock);
			wait_on_buffer(bh);
			/* the journal_head may have gone by now */
			BUFFER_TRACE(bh, "brelse");
			__brelse(bh);
			spin_lock(&journal->j_list_lock);
			goto restart2;
		}

		/*
		 * Now in whatever state the buffer currently is, we
		 * know that it has been written out and so we can
		 * drop it from the list
		 */
		if (__jbd2_journal_remove_checkpoint(jh))
			break;
	}
out:
out:
	spin_unlock(&journal->j_list_lock);
	spin_unlock(&journal->j_list_lock);
	result = jbd2_cleanup_journal_tail(journal);
	result = jbd2_cleanup_journal_tail(journal);