Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0390131b authored by Frank Mayhar's avatar Frank Mayhar Committed by Theodore Ts'o
Browse files

ext4: Allow ext4 to run without a journal



A few weeks ago I posted a patch for discussion that allowed ext4 to run
without a journal.  Since that time I've integrated the excellent
comments from Andreas and fixed several serious bugs.  We're currently
running with this patch and generating some performance numbers against
both ext2 (with backported reservations code) and ext4 with and without
a journal.  It just so happens that running without a journal is
slightly faster for most everything.

We did
	iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2

which creates 4 threads, each of which create and do reads and writes on
a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens
to bypass the page cache.  Results:

                     ext2        ext4, default   ext4, no journal
  initial writes   13.0 MB/s        15.4 MB/s          15.7 MB/s
  rewrites         13.1 MB/s        15.6 MB/s          15.9 MB/s
  reads            15.2 MB/s        16.9 MB/s          17.2 MB/s
  re-reads         15.3 MB/s        16.9 MB/s          17.2 MB/s
  random readers    5.6 MB/s         5.6 MB/s           5.7 MB/s
  random writers    5.1 MB/s         5.3 MB/s           5.4 MB/s 

So it seems that, so far, this was a useful exercise.

Signed-off-by: default avatarFrank Mayhar <fmayhar@google.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent ff7ef329
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -531,11 +531,11 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,

	/* We dirtied the bitmap block */
	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

	/* And the group descriptor block */
	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
	ret = ext4_journal_dirty_metadata(handle, gd_bh);
	ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
	if (!err) err = ret;
	*pdquot_freed_blocks += group_freed;

+63 −20
Original line number Diff line number Diff line
@@ -7,53 +7,96 @@
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
				struct buffer_head *bh)
{
	int err = jbd2_journal_get_undo_access(handle, bh);
	int err = 0;

	if (ext4_handle_valid(handle)) {
		err = jbd2_journal_get_undo_access(handle, bh);
		if (err)
		ext4_journal_abort_handle(where, __func__, bh, handle, err);
			ext4_journal_abort_handle(where, __func__, bh,
						  handle, err);
	}
	return err;
}

int __ext4_journal_get_write_access(const char *where, handle_t *handle,
				struct buffer_head *bh)
{
	int err = jbd2_journal_get_write_access(handle, bh);
	int err = 0;

	if (ext4_handle_valid(handle)) {
		err = jbd2_journal_get_write_access(handle, bh);
		if (err)
		ext4_journal_abort_handle(where, __func__, bh, handle, err);
			ext4_journal_abort_handle(where, __func__, bh,
						  handle, err);
	}
	return err;
}

int __ext4_journal_forget(const char *where, handle_t *handle,
				struct buffer_head *bh)
{
	int err = jbd2_journal_forget(handle, bh);
	int err = 0;

	if (ext4_handle_valid(handle)) {
		err = jbd2_journal_forget(handle, bh);
		if (err)
		ext4_journal_abort_handle(where, __func__, bh, handle, err);
			ext4_journal_abort_handle(where, __func__, bh,
						  handle, err);
	}
	return err;
}

int __ext4_journal_revoke(const char *where, handle_t *handle,
				ext4_fsblk_t blocknr, struct buffer_head *bh)
{
	int err = jbd2_journal_revoke(handle, blocknr, bh);
	int err = 0;

	if (ext4_handle_valid(handle)) {
		err = jbd2_journal_revoke(handle, blocknr, bh);
		if (err)
		ext4_journal_abort_handle(where, __func__, bh, handle, err);
			ext4_journal_abort_handle(where, __func__, bh,
						  handle, err);
	}
	return err;
}

int __ext4_journal_get_create_access(const char *where,
				handle_t *handle, struct buffer_head *bh)
{
	int err = jbd2_journal_get_create_access(handle, bh);
	int err = 0;

	if (ext4_handle_valid(handle)) {
		err = jbd2_journal_get_create_access(handle, bh);
		if (err)
		ext4_journal_abort_handle(where, __func__, bh, handle, err);
			ext4_journal_abort_handle(where, __func__, bh,
						  handle, err);
	}
	return err;
}

int __ext4_journal_dirty_metadata(const char *where,
				handle_t *handle, struct buffer_head *bh)
int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
				 struct inode *inode, struct buffer_head *bh)
{
	int err = jbd2_journal_dirty_metadata(handle, bh);
	int err = 0;

	if (ext4_handle_valid(handle)) {
		err = jbd2_journal_dirty_metadata(handle, bh);
		if (err)
		ext4_journal_abort_handle(where, __func__, bh, handle, err);
			ext4_journal_abort_handle(where, __func__, bh,
						  handle, err);
	} else {
		mark_buffer_dirty(bh);
		if (inode && inode_needs_sync(inode)) {
			sync_dirty_buffer(bh);
			if (buffer_req(bh) && !buffer_uptodate(bh)) {
				ext4_error(inode->i_sb, __func__,
					   "IO error syncing inode, "
					   "inode=%lu, block=%llu",
					   inode->i_ino,
					   (unsigned long long) bh->b_blocknr);
				err = -EIO;
			}
		}
	}
	return err;
}
+68 −15
Original line number Diff line number Diff line
@@ -122,12 +122,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
 * been done yet.
 */

static inline void ext4_journal_release_buffer(handle_t *handle,
						struct buffer_head *bh)
{
	jbd2_journal_release_buffer(handle, bh);
}

void ext4_journal_abort_handle(const char *caller, const char *err_fn,
		struct buffer_head *bh, handle_t *handle, int err);

@@ -146,8 +140,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
int __ext4_journal_get_create_access(const char *where,
				handle_t *handle, struct buffer_head *bh);

int __ext4_journal_dirty_metadata(const char *where,
				handle_t *handle, struct buffer_head *bh);
int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
				 struct inode *inode, struct buffer_head *bh);

#define ext4_journal_get_undo_access(handle, bh) \
	__ext4_journal_get_undo_access(__func__, (handle), (bh))
@@ -157,14 +151,57 @@ int __ext4_journal_dirty_metadata(const char *where,
	__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
#define ext4_journal_get_create_access(handle, bh) \
	__ext4_journal_get_create_access(__func__, (handle), (bh))
#define ext4_journal_dirty_metadata(handle, bh) \
	__ext4_journal_dirty_metadata(__func__, (handle), (bh))
#define ext4_journal_forget(handle, bh) \
	__ext4_journal_forget(__func__, (handle), (bh))
#define ext4_handle_dirty_metadata(handle, inode, bh) \
	__ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))

handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, handle_t *handle);

#define EXT4_NOJOURNAL_HANDLE	((handle_t *) 0x1)

static inline int ext4_handle_valid(handle_t *handle)
{
	if (handle == EXT4_NOJOURNAL_HANDLE)
		return 0;
	return 1;
}

static inline void ext4_handle_sync(handle_t *handle)
{
	if (ext4_handle_valid(handle))
		handle->h_sync = 1;
}

static inline void ext4_handle_release_buffer(handle_t *handle,
						struct buffer_head *bh)
{
	if (ext4_handle_valid(handle))
		jbd2_journal_release_buffer(handle, bh);
}

static inline int ext4_handle_is_aborted(handle_t *handle)
{
	if (ext4_handle_valid(handle))
		return is_handle_aborted(handle);
	return 0;
}

static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
{
	if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
		return 0;
	return 1;
}

static inline void ext4_journal_release_buffer(handle_t *handle,
						struct buffer_head *bh)
{
	if (ext4_handle_valid(handle))
		jbd2_journal_release_buffer(handle, bh);
}

static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
{
	return ext4_journal_start_sb(inode->i_sb, nblocks);
@@ -180,27 +217,37 @@ static inline handle_t *ext4_journal_current_handle(void)

static inline int ext4_journal_extend(handle_t *handle, int nblocks)
{
	if (ext4_handle_valid(handle))
		return jbd2_journal_extend(handle, nblocks);
	return 0;
}

static inline int ext4_journal_restart(handle_t *handle, int nblocks)
{
	if (ext4_handle_valid(handle))
		return jbd2_journal_restart(handle, nblocks);
	return 0;
}

static inline int ext4_journal_blocks_per_page(struct inode *inode)
{
	if (EXT4_JOURNAL(inode) != NULL)
		return jbd2_journal_blocks_per_page(inode);
	return 0;
}

static inline int ext4_journal_force_commit(journal_t *journal)
{
	if (journal)
		return jbd2_journal_force_commit(journal);
	return 0;
}

static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
	if (ext4_handle_valid(handle))
		return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
	return 0;
}

/* super.c */
@@ -208,6 +255,8 @@ int ext4_force_commit(struct super_block *sb);

static inline int ext4_should_journal_data(struct inode *inode)
{
	if (EXT4_JOURNAL(inode) == NULL)
		return 0;
	if (!S_ISREG(inode->i_mode))
		return 1;
	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -219,6 +268,8 @@ static inline int ext4_should_journal_data(struct inode *inode)

static inline int ext4_should_order_data(struct inode *inode)
{
	if (EXT4_JOURNAL(inode) == NULL)
		return 0;
	if (!S_ISREG(inode->i_mode))
		return 0;
	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
@@ -230,6 +281,8 @@ static inline int ext4_should_order_data(struct inode *inode)

static inline int ext4_should_writeback_data(struct inode *inode)
{
	if (EXT4_JOURNAL(inode) == NULL)
		return 0;
	if (!S_ISREG(inode->i_mode))
		return 0;
	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+7 −5
Original line number Diff line number Diff line
@@ -97,6 +97,8 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
{
	int err;

	if (!ext4_handle_valid(handle))
		return 0;
	if (handle->h_buffer_credits > needed)
		return 0;
	err = ext4_journal_extend(handle, needed);
@@ -134,7 +136,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
	int err;
	if (path->p_bh) {
		/* path points to block */
		err = ext4_journal_dirty_metadata(handle, path->p_bh);
		err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
	} else {
		/* path points to leaf/index in inode body */
		err = ext4_mark_inode_dirty(handle, inode);
@@ -780,7 +782,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
	set_buffer_uptodate(bh);
	unlock_buffer(bh);

	err = ext4_journal_dirty_metadata(handle, bh);
	err = ext4_handle_dirty_metadata(handle, inode, bh);
	if (err)
		goto cleanup;
	brelse(bh);
@@ -859,7 +861,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
		set_buffer_uptodate(bh);
		unlock_buffer(bh);

		err = ext4_journal_dirty_metadata(handle, bh);
		err = ext4_handle_dirty_metadata(handle, inode, bh);
		if (err)
			goto cleanup;
		brelse(bh);
@@ -955,7 +957,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
	set_buffer_uptodate(bh);
	unlock_buffer(bh);

	err = ext4_journal_dirty_metadata(handle, bh);
	err = ext4_handle_dirty_metadata(handle, inode, bh);
	if (err)
		goto out;

@@ -2947,7 +2949,7 @@ void ext4_ext_truncate(struct inode *inode)
	 * transaction synchronous.
	 */
	if (IS_SYNC(inode))
		handle->h_sync = 1;
		ext4_handle_sync(handle);

out_stop:
	up_write(&EXT4_I(inode)->i_data_sem);
+13 −12
Original line number Diff line number Diff line
@@ -253,12 +253,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
				spin_unlock(sb_bgl_lock(sbi, flex_group));
			}
		}
		BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
		err = ext4_journal_dirty_metadata(handle, bh2);
		BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
		err = ext4_handle_dirty_metadata(handle, NULL, bh2);
		if (!fatal) fatal = err;
	}
	BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata");
	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
	BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
	if (!fatal)
		fatal = err;
	sb->s_dirt = 1;
@@ -656,15 +656,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
						ino, bitmap_bh->b_data)) {
				/* we won it */
				BUFFER_TRACE(bitmap_bh,
					"call ext4_journal_dirty_metadata");
				err = ext4_journal_dirty_metadata(handle,
					"call ext4_handle_dirty_metadata");
				err = ext4_handle_dirty_metadata(handle,
								inode,
								bitmap_bh);
				if (err)
					goto fail;
				goto got;
			}
			/* we lost it */
			jbd2_journal_release_buffer(handle, bitmap_bh);
			ext4_handle_release_buffer(handle, bitmap_bh);

			if (++ino < EXT4_INODES_PER_GROUP(sb))
				goto repeat_in_this_group;
@@ -726,7 +727,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
		/* Don't need to dirty bitmap block if we didn't change it */
		if (free) {
			BUFFER_TRACE(block_bh, "dirty block bitmap");
			err = ext4_journal_dirty_metadata(handle, block_bh);
			err = ext4_handle_dirty_metadata(handle,
							NULL, block_bh);
		}

		brelse(block_bh);
@@ -771,8 +773,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
	}
	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
	spin_unlock(sb_bgl_lock(sbi, group));
	BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
	err = ext4_journal_dirty_metadata(handle, bh2);
	BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
	err = ext4_handle_dirty_metadata(handle, NULL, bh2);
	if (err) goto fail;

	percpu_counter_dec(&sbi->s_freeinodes_counter);
@@ -825,7 +827,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)

	ext4_set_inode_flags(inode);
	if (IS_DIRSYNC(inode))
		handle->h_sync = 1;
		ext4_handle_sync(handle);
	if (insert_inode_locked(inode) < 0) {
		err = -EINVAL;
		goto fail_drop;
@@ -1028,4 +1030,3 @@ unsigned long ext4_count_dirs(struct super_block * sb)
	}
	return count;
}
Loading