Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2bc20d09 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  jbd: fix race between write_metadata_buffer and get_write_access
  ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle()
  jbd: Fix a race between checkpointing code and journal_get_write_access()
  ext3: Fix truncation of symlinks after failed write
  jbd: Fail to load a journal if it is too short
parents c7425eb4 f1015c44
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
		struct buffer_head *bh = NULL;

		map_bh.b_state = 0;
		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
						&map_bh, 0, 0);
		err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
		if (err > 0) {
			pgoff_t index = map_bh.b_blocknr >>
					(PAGE_CACHE_SHIFT - inode->i_blkbits);
+13 −19
Original line number Diff line number Diff line
@@ -788,7 +788,7 @@ err_out:
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
		sector_t iblock, unsigned long maxblocks,
		struct buffer_head *bh_result,
		int create, int extend_disksize)
		int create)
{
	int err = -EIO;
	int offsets[4];
@@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
	if (!err)
		err = ext3_splice_branch(handle, inode, iblock,
					partial, indirect_blks, count);
	/*
	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
	 * protect it if you're about to implement concurrent
	 * ext3_get_block() -bzzz
	*/
	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
		ei->i_disksize = inode->i_size;
	mutex_unlock(&ei->truncate_mutex);
	if (err)
		goto cleanup;
@@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
	}

	ret = ext3_get_blocks_handle(handle, inode, iblock,
					max_blocks, bh_result, create, 0);
					max_blocks, bh_result, create);
	if (ret > 0) {
		bh_result->b_size = (ret << inode->i_blkbits);
		ret = 0;
@@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
	dummy.b_blocknr = -1000;
	buffer_trace_init(&dummy.b_history);
	err = ext3_get_blocks_handle(handle, inode, block, 1,
					&dummy, create, 1);
					&dummy, create);
	/*
	 * ext3_get_blocks_handle() returns number of blocks
	 * mapped. 0 in case of a HOLE.
@@ -1193,15 +1186,16 @@ write_begin_failed:
		 * i_size_read because we hold i_mutex.
		 *
		 * Add inode to orphan list in case we crash before truncate
		 * finishes.
		 * finishes. Do this only if ext3_can_truncate() agrees so
		 * that orphan processing code is happy.
		 */
		if (pos + len > inode->i_size)
		if (pos + len > inode->i_size && ext3_can_truncate(inode))
			ext3_orphan_add(handle, inode);
		ext3_journal_stop(handle);
		unlock_page(page);
		page_cache_release(page);
		if (pos + len > inode->i_size)
			vmtruncate(inode, inode->i_size);
			ext3_truncate(inode);
	}
	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
		goto retry;
@@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
	 * There may be allocated blocks outside of i_size because
	 * we failed to copy some data. Prepare for truncate.
	 */
	if (pos + len > inode->i_size)
	if (pos + len > inode->i_size && ext3_can_truncate(inode))
		ext3_orphan_add(handle, inode);
	ret2 = ext3_journal_stop(handle);
	if (!ret)
@@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
	page_cache_release(page);

	if (pos + len > inode->i_size)
		vmtruncate(inode, inode->i_size);
		ext3_truncate(inode);
	return ret ? ret : copied;
}

@@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
	 * There may be allocated blocks outside of i_size because
	 * we failed to copy some data. Prepare for truncate.
	 */
	if (pos + len > inode->i_size)
	if (pos + len > inode->i_size && ext3_can_truncate(inode))
		ext3_orphan_add(handle, inode);
	ret = ext3_journal_stop(handle);
	unlock_page(page);
	page_cache_release(page);

	if (pos + len > inode->i_size)
		vmtruncate(inode, inode->i_size);
		ext3_truncate(inode);
	return ret ? ret : copied;
}

@@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
	 * There may be allocated blocks outside of i_size because
	 * we failed to copy some data. Prepare for truncate.
	 */
	if (pos + len > inode->i_size)
	if (pos + len > inode->i_size && ext3_can_truncate(inode))
		ext3_orphan_add(handle, inode);
	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
	if (inode->i_size > EXT3_I(inode)->i_disksize) {
@@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
	page_cache_release(page);

	if (pos + len > inode->i_size)
		vmtruncate(inode, inode->i_size);
		ext3_truncate(inode);
	return ret ? ret : copied;
}

+17 −9
Original line number Diff line number Diff line
@@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
	struct page *new_page;
	unsigned int new_offset;
	struct buffer_head *bh_in = jh2bh(jh_in);
	journal_t *journal = transaction->t_journal;

	/*
	 * The buffer really shouldn't be locked: only the current committing
@@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));

	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
	/* keep subsequent assertions sane */
	new_bh->b_state = 0;
	init_buffer(new_bh, NULL, NULL);
	atomic_set(&new_bh->b_count, 1);
	new_jh = journal_add_journal_head(new_bh);	/* This sleeps */

	/*
	 * If a new transaction has already done a buffer copy-out, then
@@ -361,14 +367,6 @@ repeat:
		kunmap_atomic(mapped_data, KM_USER0);
	}

	/* keep subsequent assertions sane */
	new_bh->b_state = 0;
	init_buffer(new_bh, NULL, NULL);
	atomic_set(&new_bh->b_count, 1);
	jbd_unlock_bh_state(bh_in);

	new_jh = journal_add_journal_head(new_bh);	/* This sleeps */

	set_bh_page(new_bh, new_page, new_offset);
	new_jh->b_transaction = NULL;
	new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -385,7 +383,11 @@ repeat:
	 * copying is moved to the transaction's shadow queue.
	 */
	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
	journal_file_buffer(jh_in, transaction, BJ_Shadow);
	spin_lock(&journal->j_list_lock);
	__journal_file_buffer(jh_in, transaction, BJ_Shadow);
	spin_unlock(&journal->j_list_lock);
	jbd_unlock_bh_state(bh_in);

	JBUFFER_TRACE(new_jh, "file as BJ_IO");
	journal_file_buffer(new_jh, transaction, BJ_IO);

@@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)

	first = be32_to_cpu(sb->s_first);
	last = be32_to_cpu(sb->s_maxlen);
	if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
		printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
		       first, last);
		journal_fail_superblock(journal);
		return -EINVAL;
	}

	journal->j_first = first;
	journal->j_last = last;
+35 −33
Original line number Diff line number Diff line
@@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
	wake_up(&journal->j_wait_transaction_locked);
}

/*
 * Report any unexpected dirty buffers which turn up.  Normally those
 * indicate an error, but they can occur if the user is running (say)
 * tune2fs to modify the live filesystem, so we need the option of
 * continuing as gracefully as possible.  #
 *
 * The caller should already hold the journal lock and
 * j_list_lock spinlock: most callers will need those anyway
 * in order to probe the buffer's journaling state safely.
 */
static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
static void warn_dirty_buffer(struct buffer_head *bh)
{
	int jlist;

	/* If this buffer is one which might reasonably be dirty
	 * --- ie. data, or not part of this journal --- then
	 * we're OK to leave it alone, but otherwise we need to
	 * move the dirty bit to the journal's own internal
	 * JBDDirty bit. */
	jlist = jh->b_jlist;

	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
	    jlist == BJ_Shadow || jlist == BJ_Forget) {
		struct buffer_head *bh = jh2bh(jh);
	char b[BDEVNAME_SIZE];

		if (test_clear_buffer_dirty(bh))
			set_buffer_jbddirty(bh);
	}
	printk(KERN_WARNING
	       "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
	       "There's a risk of filesystem corruption in case of system "
	       "crash.\n",
	       bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}

/*
@@ -583,14 +564,16 @@ repeat:
			if (jh->b_next_transaction)
				J_ASSERT_JH(jh, jh->b_next_transaction ==
							transaction);
			warn_dirty_buffer(bh);
		}
		/*
		 * In any case we need to clean the dirty flag and we must
		 * do it under the buffer lock to be sure we don't race
		 * with running write-out.
		 */
		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
		jbd_unexpected_dirty_buffer(jh);
		JBUFFER_TRACE(jh, "Journalling dirty buffer");
		clear_buffer_dirty(bh);
		set_buffer_jbddirty(bh);
	}

	unlock_buffer(bh);
@@ -826,6 +809,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));

	if (jh->b_transaction == NULL) {
		/*
		 * Previous journal_forget() could have left the buffer
		 * with jbddirty bit set because it was being committed. When
		 * the commit finished, we've filed the buffer for
		 * checkpointing and marked it dirty. Now we are reallocating
		 * the buffer so the transaction freeing it must have
		 * committed and so it's safe to clear the dirty bit.
		 */
		clear_buffer_dirty(jh2bh(jh));
		jh->b_transaction = transaction;

		/* first access by this transaction */
@@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)

	if (jh->b_cp_transaction) {
		JBUFFER_TRACE(jh, "on running+cp transaction");
		/*
		 * We don't want to write the buffer anymore, clear the
		 * bit so that we don't confuse checks in
		 * __journal_file_buffer
		 */
		clear_buffer_dirty(bh);
		__journal_file_buffer(jh, transaction, BJ_Forget);
		clear_buffer_jbddirty(bh);
		may_free = 0;
	} else {
		JBUFFER_TRACE(jh, "on running transaction");
@@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
	if (jh->b_transaction && jh->b_jlist == jlist)
		return;

	/* The following list of buffer states needs to be consistent
	 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
	 * state. */

	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
	    jlist == BJ_Shadow || jlist == BJ_Forget) {
		/*
		 * For metadata buffers, we track dirty bit in buffer_jbddirty
		 * instead of buffer_dirty. We should not see a dirty bit set
		 * here because we clear it in do_get_write_access but e.g.
		 * tune2fs can modify the sb and set the dirty bit at any time
		 * so we try to gracefully handle that.
		 */
		if (buffer_dirty(bh))
			warn_dirty_buffer(bh);
		if (test_clear_buffer_dirty(bh) ||
		    test_clear_buffer_jbddirty(bh))
			was_dirty = 1;
+1 −1
Original line number Diff line number Diff line
@@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
	int create, int extend_disksize);
	int create);

extern struct inode *ext3_iget(struct super_block *, unsigned long);
extern int  ext3_write_inode (struct inode *, int);