Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bb189247 authored by Jan Kara's avatar Jan Kara
Browse files

jbd: Fix oops in journal_remove_journal_head()



journal_remove_journal_head() can oops when trying to access journal_head
returned by bh2jh(). This is caused for example by the following race:

	TASK1					TASK2
  journal_commit_transaction()
    ...
    processing t_forget list
      __journal_refile_buffer(jh);
      if (!jh->b_transaction) {
        jbd_unlock_bh_state(bh);
					journal_try_to_free_buffers()
					  journal_grab_journal_head(bh)
					  jbd_lock_bh_state(bh)
					  __journal_try_to_free_buffer()
					  journal_put_journal_head(jh)
        journal_remove_journal_head(bh);

journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not
part of any transaction and thus frees journal_head before TASK1 gets to doing
so. Note that even buffer_head can be released by try_to_free_buffers() after
journal_put_journal_head() which adds even larger opportunity for oops (but I
didn't see this happen in reality).

Fix the problem by making transactions hold their own journal_head reference
(in b_jcount). That way we don't have to remove journal_head explicitely via
journal_remove_journal_head() and instead just remove journal_head when
b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(),
[__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free
journal_head which needs modification of a few callers. Also we have to be
careful because once journal_head is removed, buffer_head might be freed as
well. So we have to get our own buffer_head reference where it matters.

Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent 2c2ea945
Loading
Loading
Loading
Loading
+15 −12
Original line number Diff line number Diff line
@@ -96,10 +96,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)

	if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
		/*
		 * Get our reference so that bh cannot be freed before
		 * we unlock it
		 */
		get_bh(bh);
		JBUFFER_TRACE(jh, "remove from checkpoint list");
		ret = __journal_remove_checkpoint(jh) + 1;
		jbd_unlock_bh_state(bh);
		journal_remove_journal_head(bh);
		BUFFER_TRACE(bh, "release");
		__brelse(bh);
	} else {
@@ -221,8 +225,8 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
			spin_lock(&journal->j_list_lock);
			goto restart;
		}
		if (buffer_locked(bh)) {
		get_bh(bh);
		if (buffer_locked(bh)) {
			spin_unlock(&journal->j_list_lock);
			jbd_unlock_bh_state(bh);
			wait_on_buffer(bh);
@@ -241,7 +245,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
		 */
		released = __journal_remove_checkpoint(jh);
		jbd_unlock_bh_state(bh);
		journal_remove_journal_head(bh);
		__brelse(bh);
	}

@@ -305,12 +308,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
		ret = 1;
		if (unlikely(buffer_write_io_error(bh)))
			ret = -EIO;
		get_bh(bh);
		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
		BUFFER_TRACE(bh, "remove from checkpoint");
		__journal_remove_checkpoint(jh);
		spin_unlock(&journal->j_list_lock);
		jbd_unlock_bh_state(bh);
		journal_remove_journal_head(bh);
		__brelse(bh);
	} else {
		/*
@@ -526,9 +529,9 @@ int cleanup_journal_tail(journal_t *journal)
/*
 * journal_clean_one_cp_list
 *
 * Find all the written-back checkpoint buffers in the given list and release them.
 * Find all the written-back checkpoint buffers in the given list and release
 * them.
 *
 * Called with the journal locked.
 * Called with j_list_lock held.
 * Returns number of bufers reaped (for debug)
 */
@@ -635,8 +638,8 @@ int __journal_clean_checkpoint_list(journal_t *journal)
 * checkpoint lists.
 *
 * The function returns 1 if it frees the transaction, 0 otherwise.
 * The function can free jh and bh.
 *
 * This function is called with the journal locked.
 * This function is called with j_list_lock held.
 * This function is called with jbd_lock_bh_state(jh2bh(jh))
 */
@@ -655,13 +658,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
	}
	journal = transaction->t_journal;

	JBUFFER_TRACE(jh, "removing from transaction");
	__buffer_unlink(jh);
	jh->b_cp_transaction = NULL;
	journal_put_journal_head(jh);

	if (transaction->t_checkpoint_list != NULL ||
	    transaction->t_checkpoint_io_list != NULL)
		goto out;
	JBUFFER_TRACE(jh, "transaction has no more buffers");

	/*
	 * There is one special case to worry about: if we have just pulled the
@@ -672,10 +676,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
	 * The locking here around t_state is a bit sleazy.
	 * See the comment at the end of journal_commit_transaction().
	 */
	if (transaction->t_state != T_FINISHED) {
		JBUFFER_TRACE(jh, "belongs to running/committing transaction");
	if (transaction->t_state != T_FINISHED)
		goto out;
	}

	/* OK, that was the last buffer for the transaction: we can now
	   safely remove this transaction from the log */
@@ -687,7 +689,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
	wake_up(&journal->j_wait_logspace);
	ret = 1;
out:
	JBUFFER_TRACE(jh, "exit");
	return ret;
}

@@ -706,6 +707,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
	J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);

	/* Get reference for checkpointing transaction */
	journal_grab_journal_head(jh2bh(jh));
	jh->b_cp_transaction = transaction;

	if (!transaction->t_checkpoint_list) {
+21 −25
Original line number Diff line number Diff line
@@ -258,10 +258,6 @@ static int journal_submit_data_buffers(journal_t *journal,
			jbd_unlock_bh_state(bh);
			if (locked)
				unlock_buffer(bh);
			journal_remove_journal_head(bh);
			/* One for our safety reference, other for
			 * journal_remove_journal_head() */
			put_bh(bh);
			release_data_buffer(bh);
		}

@@ -455,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
		}
		if (buffer_jbd(bh) && bh2jh(bh) == jh &&
		    jh->b_transaction == commit_transaction &&
		    jh->b_jlist == BJ_Locked) {
		    jh->b_jlist == BJ_Locked)
			__journal_unfile_buffer(jh);
		jbd_unlock_bh_state(bh);
			journal_remove_journal_head(bh);
			put_bh(bh);
		} else {
			jbd_unlock_bh_state(bh);
		}
		release_data_buffer(bh);
		cond_resched_lock(&journal->j_list_lock);
	}
@@ -807,10 +798,16 @@ void journal_commit_transaction(journal_t *journal)
	while (commit_transaction->t_forget) {
		transaction_t *cp_transaction;
		struct buffer_head *bh;
		int try_to_free = 0;

		jh = commit_transaction->t_forget;
		spin_unlock(&journal->j_list_lock);
		bh = jh2bh(jh);
		/*
		 * Get a reference so that bh cannot be freed before we are
		 * done with it.
		 */
		get_bh(bh);
		jbd_lock_bh_state(bh);
		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
			jh->b_transaction == journal->j_running_transaction);
@@ -868,28 +865,27 @@ void journal_commit_transaction(journal_t *journal)
			__journal_insert_checkpoint(jh, commit_transaction);
			if (is_journal_aborted(journal))
				clear_buffer_jbddirty(bh);
			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
			__journal_refile_buffer(jh);
			jbd_unlock_bh_state(bh);
		} else {
			J_ASSERT_BH(bh, !buffer_dirty(bh));
			/* The buffer on BJ_Forget list and not jbddirty means
			/*
			 * The buffer on BJ_Forget list and not jbddirty means
			 * it has been freed by this transaction and hence it
			 * could not have been reallocated until this
			 * transaction has committed. *BUT* it could be
			 * reallocated once we have written all the data to
			 * disk and before we process the buffer on BJ_Forget
			 * list. */
			 * list.
			 */
			if (!jh->b_next_transaction)
				try_to_free = 1;
		}
		JBUFFER_TRACE(jh, "refile or unfile freed buffer");
		__journal_refile_buffer(jh);
			if (!jh->b_transaction) {
		jbd_unlock_bh_state(bh);
				 /* needs a brelse */
				journal_remove_journal_head(bh);
		if (try_to_free)
			release_buffer_page(bh);
			} else
				jbd_unlock_bh_state(bh);
		}
		else
			__brelse(bh);
		cond_resched_lock(&journal->j_list_lock);
	}
	spin_unlock(&journal->j_list_lock);
+31 −64
Original line number Diff line number Diff line
@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
 * When a buffer has its BH_JBD bit set it is immune from being released by
 * core kernel code, mainly via ->b_count.
 *
 * A journal_head may be detached from its buffer_head when the journal_head's
 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
 * Various places in JBD call journal_remove_journal_head() to indicate that the
 * journal_head can be dropped if needed.
 * A journal_head is detached from its buffer_head when the journal_head's
 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
 * transaction (b_cp_transaction) hold their references to b_jcount.
 *
 * Various places in the kernel want to attach a journal_head to a buffer_head
 * _before_ attaching the journal_head to a transaction.  To protect the
@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
 *	(Attach a journal_head if needed.  Increments b_jcount)
 *	struct journal_head *jh = journal_add_journal_head(bh);
 *	...
 *      (Get another reference for transaction)
 *      journal_grab_journal_head(bh);
 *      jh->b_transaction = xxx;
 *      (Put original reference)
 *      journal_put_journal_head(jh);
 *
 * Now, the journal_head's b_jcount is zero, but it is safe from being released
 * because it has a non-zero b_transaction.
 */

/*
 * Give a buffer_head a journal_head.
 *
 * Doesn't need the journal lock.
 * May sleep.
 */
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
	struct journal_head *jh = bh2jh(bh);

	J_ASSERT_JH(jh, jh->b_jcount >= 0);

	get_bh(bh);
	if (jh->b_jcount == 0) {
		if (jh->b_transaction == NULL &&
				jh->b_next_transaction == NULL &&
				jh->b_cp_transaction == NULL) {
	J_ASSERT_JH(jh, jh->b_transaction == NULL);
	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
	J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
	J_ASSERT_BH(bh, buffer_jbd(bh));
	J_ASSERT_BH(bh, jh2bh(jh) == bh);
	BUFFER_TRACE(bh, "remove journal_head");
	if (jh->b_frozen_data) {
				printk(KERN_WARNING "%s: freeing "
						"b_frozen_data\n",
						__func__);
		printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
		jbd_free(jh->b_frozen_data, bh->b_size);
	}
	if (jh->b_committed_data) {
				printk(KERN_WARNING "%s: freeing "
						"b_committed_data\n",
						__func__);
		printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
		jbd_free(jh->b_committed_data, bh->b_size);
	}
	bh->b_private = NULL;
	jh->b_bh = NULL;	/* debug, really */
	clear_buffer_jbd(bh);
			__brelse(bh);
	journal_free_journal_head(jh);
		} else {
			BUFFER_TRACE(bh, "journal_head was locked");
		}
	}
}

/*
 * journal_remove_journal_head(): if the buffer isn't attached to a transaction
 * and has a zero b_jcount then remove and release its journal_head.   If we did
 * see that the buffer is not used by any transaction we also "logically"
 * decrement ->b_count.
 *
 * We in fact take an additional increment on ->b_count as a convenience,
 * because the caller usually wants to do additional things with the bh
 * after calling here.
 * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
 * time.  Once the caller has run __brelse(), the buffer is eligible for
 * reaping by try_to_free_buffers().
 */
void journal_remove_journal_head(struct buffer_head *bh)
{
	jbd_lock_bh_journal_head(bh);
	__journal_remove_journal_head(bh);
	jbd_unlock_bh_journal_head(bh);
}

/*
 * Drop a reference on the passed journal_head.  If it fell to zero then try to
 * Drop a reference on the passed journal_head.  If it fell to zero then
 * release the journal_head from the buffer_head.
 */
void journal_put_journal_head(struct journal_head *jh)
@@ -1957,10 +1923,11 @@ void journal_put_journal_head(struct journal_head *jh)
	jbd_lock_bh_journal_head(bh);
	J_ASSERT_JH(jh, jh->b_jcount > 0);
	--jh->b_jcount;
	if (!jh->b_jcount && !jh->b_transaction) {
	if (!jh->b_jcount) {
		__journal_remove_journal_head(bh);
		jbd_unlock_bh_journal_head(bh);
		__brelse(bh);
	}
	} else
		jbd_unlock_bh_journal_head(bh);
}

+37 −36
Original line number Diff line number Diff line
@@ -696,7 +696,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
	if (!jh->b_transaction) {
		JBUFFER_TRACE(jh, "no transaction");
		J_ASSERT_JH(jh, !jh->b_next_transaction);
		jh->b_transaction = transaction;
		JBUFFER_TRACE(jh, "file as BJ_Reserved");
		spin_lock(&journal->j_list_lock);
		__journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
		 * committed and so it's safe to clear the dirty bit.
		 */
		clear_buffer_dirty(jh2bh(jh));
		jh->b_transaction = transaction;

		/* first access by this transaction */
		jh->b_modified = 0;
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
				ret = -EIO;
				goto no_journal;
			}

			if (jh->b_transaction != NULL) {
			/* We might have slept so buffer could be refiled now */
			if (jh->b_transaction != NULL &&
			    jh->b_transaction != handle->h_transaction) {
				JBUFFER_TRACE(jh, "unfile from commit");
				__journal_temp_unlink_buffer(jh);
				/* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
			JBUFFER_TRACE(jh, "not on correct data list: unfile");
			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
			__journal_temp_unlink_buffer(jh);
			jh->b_transaction = handle->h_transaction;
			JBUFFER_TRACE(jh, "file as data");
			__journal_file_buffer(jh, handle->h_transaction,
						BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
			__journal_file_buffer(jh, transaction, BJ_Forget);
		} else {
			__journal_unfile_buffer(jh);
			journal_remove_journal_head(bh);
			__brelse(bh);
			if (!buffer_jbd(bh)) {
				spin_unlock(&journal->j_list_lock);
				jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
		mark_buffer_dirty(bh);	/* Expose it to the VM */
}

/*
 * Remove buffer from all transactions.
 *
 * Called with bh_state lock and j_list_lock
 *
 * jh and bh may be already freed when this function returns.
 */
void __journal_unfile_buffer(struct journal_head *jh)
{
	__journal_temp_unlink_buffer(jh);
	jh->b_transaction = NULL;
	journal_put_journal_head(jh);
}

void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{
	jbd_lock_bh_state(jh2bh(jh));
	struct buffer_head *bh = jh2bh(jh);

	/* Get reference so that buffer cannot be freed before we unlock it */
	get_bh(bh);
	jbd_lock_bh_state(bh);
	spin_lock(&journal->j_list_lock);
	__journal_unfile_buffer(jh);
	spin_unlock(&journal->j_list_lock);
	jbd_unlock_bh_state(jh2bh(jh));
	jbd_unlock_bh_state(bh);
	__brelse(bh);
}

/*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
			/* A written-back ordered data buffer */
			JBUFFER_TRACE(jh, "release data");
			__journal_unfile_buffer(jh);
			journal_remove_journal_head(bh);
			__brelse(bh);
		}
	} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
		/* written-back checkpointed metadata buffer */
		if (jh->b_jlist == BJ_None) {
			JBUFFER_TRACE(jh, "remove from checkpoint list");
			__journal_remove_checkpoint(jh);
			journal_remove_journal_head(bh);
			__brelse(bh);
		}
	}
	spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
		/*
		 * We take our own ref against the journal_head here to avoid
		 * having to add tons of locking around each instance of
		 * journal_remove_journal_head() and journal_put_journal_head().
		 * journal_put_journal_head().
		 */
		jh = journal_grab_journal_head(bh);
		if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
	int may_free = 1;
	struct buffer_head *bh = jh2bh(jh);

	__journal_unfile_buffer(jh);

	if (jh->b_cp_transaction) {
		JBUFFER_TRACE(jh, "on running+cp transaction");
		__journal_temp_unlink_buffer(jh);
		/*
		 * We don't want to write the buffer anymore, clear the
		 * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
		may_free = 0;
	} else {
		JBUFFER_TRACE(jh, "on running transaction");
		journal_remove_journal_head(bh);
		__brelse(bh);
		__journal_unfile_buffer(jh);
	}
	return may_free;
}
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,

	if (jh->b_transaction)
		__journal_temp_unlink_buffer(jh);
	else
		journal_grab_journal_head(bh);
	jh->b_transaction = transaction;

	switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
 * already started to be used by a subsequent transaction, refile the
 * buffer on that transaction's metadata list.
 *
 * Called under journal->j_list_lock
 *
 * Called under j_list_lock
 * Called under jbd_lock_bh_state(jh2bh(jh))
 *
 * jh and bh may be already free when this function returns
 */
void __journal_refile_buffer(struct journal_head *jh)
{
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)

	was_dirty = test_clear_buffer_jbddirty(bh);
	__journal_temp_unlink_buffer(jh);
	/*
	 * We set b_transaction here because b_next_transaction will inherit
	 * our jh reference and thus __journal_file_buffer() must not take a
	 * new one.
	 */
	jh->b_transaction = jh->b_next_transaction;
	jh->b_next_transaction = NULL;
	if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
}

/*
 * For the unlocked version of this call, also make sure that any
 * hanging journal_head is cleaned up if necessary.
 * __journal_refile_buffer() with necessary locking added. We take our bh
 * reference so that we can safely unlock bh.
 *
 * __journal_refile_buffer is usually called as part of a single locked
 * operation on a buffer_head, in which the caller is probably going to
 * be hooking the journal_head onto other lists.  In that case it is up
 * to the caller to remove the journal_head if necessary.  For the
 * unlocked journal_refile_buffer call, the caller isn't going to be
 * doing anything else to the buffer so we need to do the cleanup
 * ourselves to avoid a jh leak.
 *
 * *** The journal_head may be freed by this call! ***
 * The jh and bh may be freed by this call.
 */
void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
	struct buffer_head *bh = jh2bh(jh);

	/* Get reference so that buffer cannot be freed before we unlock it */
	get_bh(bh);
	jbd_lock_bh_state(bh);
	spin_lock(&journal->j_list_lock);

	__journal_refile_buffer(jh);
	jbd_unlock_bh_state(bh);
	journal_remove_journal_head(bh);

	spin_unlock(&journal->j_list_lock);
	__brelse(bh);
}
+0 −1
Original line number Diff line number Diff line
@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *);
 */
struct journal_head *journal_add_journal_head(struct buffer_head *bh);
struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
void journal_remove_journal_head(struct buffer_head *bh);
void journal_put_journal_head(struct journal_head *jh);

/*