Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c9de560d authored by Alex Tomas's avatar Alex Tomas Committed by Theodore Ts'o
Browse files

ext4: Add multi block allocator for ext4

parent 1988b51e
Loading
Loading
Loading
Loading
+9 −1
Original line number Original line Diff line number Diff line
@@ -86,9 +86,11 @@ Alex is working on a new set of patches right now.
When mounting an ext4 filesystem, the following option are accepted:
When mounting an ext4 filesystem, the following option are accepted:
(*) == default
(*) == default


extents			ext4 will use extents to address file data.  The
extents		(*)	ext4 will use extents to address file data.  The
			file system will no longer be mountable by ext3.
			file system will no longer be mountable by ext3.


noextents		ext4 will not use extents for newly created files

journal_checksum	Enable checksumming of the journal transactions.
journal_checksum	Enable checksumming of the journal transactions.
			This will allow the recovery code in e2fsck and the
			This will allow the recovery code in e2fsck and the
			kernel to detect corruption in the kernel.  It is a
			kernel to detect corruption in the kernel.  It is a
@@ -206,6 +208,12 @@ nobh (a) cache disk block mapping information
			"nobh" option tries to avoid associating buffer
			"nobh" option tries to avoid associating buffer
			heads (supported only for "writeback" mode).
			heads (supported only for "writeback" mode).


mballoc		(*)	Use the multiple block allocator for block allocation
nomballoc		disabled multiple block allocator for block allocation.
stripe=n		Number of filesystem blocks that mballoc will try
			to use for allocation size and alignment. For RAID5/6
			systems this should be the number of data
			disks *  RAID chunk size in file system blocks.


Data Mode
Data Mode
---------
---------
+39 −0
Original line number Original line Diff line number Diff line
@@ -857,6 +857,45 @@ CPUs.
The   "procs_blocked" line gives  the  number of  processes currently blocked,
The   "procs_blocked" line gives  the  number of  processes currently blocked,
waiting for I/O to complete.
waiting for I/O to complete.


1.9 Ext4 file system parameters
------------------------------
Ext4 file system have one directory per partition under /proc/fs/ext4/
# ls /proc/fs/ext4/hdc/
group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req
stats  stream_req

mb_groups:
This file gives the details of mutiblock allocator buddy cache of free blocks

mb_history:
Multiblock allocation history.

stats:
This file indicate whether the multiblock allocator should start collecting
statistics. The statistics are shown during unmount

group_prealloc:
The multiblock allocator normalize the block allocation request to
group_prealloc filesystem blocks if we don't have strip value set.
The stripe value can be specified at mount time or during mke2fs.

max_to_scan:
How long multiblock allocator can look for a best extent (in found extents)

min_to_scan:
How long multiblock allocator  must look for a best extent

order2_req:
Multiblock allocator use  2^N search using buddies only for requests greater
than or equal to order2_req. The request size is specfied in file system
blocks. A value of 2 indicate only if the requests are greater than or equal
to 4 blocks.

stream_req:
Files smaller than stream_req are served by the stream allocator, whose
purpose is to pack requests as close each to other as possible to
produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
filesystem block size will use group based preallocation.


------------------------------------------------------------------------------
------------------------------------------------------------------------------
Summary
Summary
+1 −1
Original line number Original line Diff line number Diff line
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o


ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
		   ext4_jbd2.o migrate.o
		   ext4_jbd2.o migrate.o mballoc.o


ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
+56 −11
Original line number Original line Diff line number Diff line
@@ -577,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode)
	struct ext4_reserve_window_node *rsv;
	struct ext4_reserve_window_node *rsv;
	spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
	spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;


	ext4_mb_discard_inode_preallocations(inode);

	if (!block_i)
	if (!block_i)
		return;
		return;


@@ -785,19 +787,29 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 * @inode:		inode
 * @inode:		inode
 * @block:		start physical block to free
 * @block:		start physical block to free
 * @count:		number of blocks to count
 * @count:		number of blocks to count
 * @metadata: 		Are these metadata blocks
 */
 */
void ext4_free_blocks(handle_t *handle, struct inode *inode,
void ext4_free_blocks(handle_t *handle, struct inode *inode,
			ext4_fsblk_t block, unsigned long count)
			ext4_fsblk_t block, unsigned long count,
			int metadata)
{
{
	struct super_block * sb;
	struct super_block * sb;
	unsigned long dquot_freed_blocks;
	unsigned long dquot_freed_blocks;


	/* this isn't the right place to decide whether block is metadata
	 * inode.c/extents.c knows better, but for safety ... */
	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
			ext4_should_journal_data(inode))
		metadata = 1;

	sb = inode->i_sb;
	sb = inode->i_sb;
	if (!sb) {

		printk ("ext4_free_blocks: nonexistent device");
	if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
		return;
		ext4_free_blocks_sb(handle, sb, block, count,
	}
						&dquot_freed_blocks);
	ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
	else
		ext4_mb_free_blocks(handle, inode, block, count,
						metadata, &dquot_freed_blocks);
	if (dquot_freed_blocks)
	if (dquot_freed_blocks)
		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
	return;
	return;
@@ -1576,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
}
}


/**
/**
 * ext4_new_blocks() -- core block(s) allocation function
 * ext4_new_blocks_old() -- core block(s) allocation function
 * @handle:		handle to this transaction
 * @handle:		handle to this transaction
 * @inode:		file inode
 * @inode:		file inode
 * @goal:		given target block(filesystem wide)
 * @goal:		given target block(filesystem wide)
@@ -1589,7 +1601,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 * any specific goal block.
 * any specific goal block.
 *
 *
 */
 */
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
			ext4_fsblk_t goal, unsigned long *count, int *errp)
			ext4_fsblk_t goal, unsigned long *count, int *errp)
{
{
	struct buffer_head *bitmap_bh = NULL;
	struct buffer_head *bitmap_bh = NULL;
@@ -1851,11 +1863,44 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
		ext4_fsblk_t goal, int *errp)
		ext4_fsblk_t goal, int *errp)
{
{
	struct ext4_allocation_request ar;
	ext4_fsblk_t ret;

	if (!test_opt(inode->i_sb, MBALLOC)) {
		unsigned long count = 1;
		unsigned long count = 1;
		ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
		return ret;
	}


	return ext4_new_blocks(handle, inode, goal, &count, errp);
	memset(&ar, 0, sizeof(ar));
	ar.inode = inode;
	ar.goal = goal;
	ar.len = 1;
	ret = ext4_mb_new_blocks(handle, &ar, errp);
	return ret;
}

ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
		ext4_fsblk_t goal, unsigned long *count, int *errp)
{
	struct ext4_allocation_request ar;
	ext4_fsblk_t ret;

	if (!test_opt(inode->i_sb, MBALLOC)) {
		ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
		return ret;
	}

	memset(&ar, 0, sizeof(ar));
	ar.inode = inode;
	ar.goal = goal;
	ar.len = *count;
	ret = ext4_mb_new_blocks(handle, &ar, errp);
	*count = ar.len;
	return ret;
}
}



/**
/**
 * ext4_count_free_blocks() -- count filesystem free blocks
 * ext4_count_free_blocks() -- count filesystem free blocks
 * @sb:		superblock
 * @sb:		superblock
+36 −9
Original line number Original line Diff line number Diff line
@@ -853,7 +853,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
		for (i = 0; i < depth; i++) {
		for (i = 0; i < depth; i++) {
			if (!ablocks[i])
			if (!ablocks[i])
				continue;
				continue;
			ext4_free_blocks(handle, inode, ablocks[i], 1);
			ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
		}
		}
	}
	}
	kfree(ablocks);
	kfree(ablocks);
@@ -1698,7 +1698,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
	ext_debug("index is empty, remove it, free block %llu\n", leaf);
	ext_debug("index is empty, remove it, free block %llu\n", leaf);
	bh = sb_find_get_block(inode->i_sb, leaf);
	bh = sb_find_get_block(inode->i_sb, leaf);
	ext4_forget(handle, 1, inode, bh, leaf);
	ext4_forget(handle, 1, inode, bh, leaf);
	ext4_free_blocks(handle, inode, leaf, 1);
	ext4_free_blocks(handle, inode, leaf, 1, 1);
	return err;
	return err;
}
}


@@ -1759,8 +1759,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
{
{
	struct buffer_head *bh;
	struct buffer_head *bh;
	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
	int i;
	int i, metadata = 0;


	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
		metadata = 1;
#ifdef EXTENTS_STATS
#ifdef EXTENTS_STATS
	{
	{
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -1789,7 +1791,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
			bh = sb_find_get_block(inode->i_sb, start + i);
			bh = sb_find_get_block(inode->i_sb, start + i);
			ext4_forget(handle, 0, inode, bh, start + i);
			ext4_forget(handle, 0, inode, bh, start + i);
		}
		}
		ext4_free_blocks(handle, inode, start, num);
		ext4_free_blocks(handle, inode, start, num, metadata);
	} else if (from == le32_to_cpu(ex->ee_block)
	} else if (from == le32_to_cpu(ex->ee_block)
		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2287,6 +2289,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
	ext4_fsblk_t goal, newblock;
	ext4_fsblk_t goal, newblock;
	int err = 0, depth, ret;
	int err = 0, depth, ret;
	unsigned long allocated = 0;
	unsigned long allocated = 0;
	struct ext4_allocation_request ar;


	__clear_bit(BH_New, &bh_result->b_state);
	__clear_bit(BH_New, &bh_result->b_state);
	ext_debug("blocks %u/%lu requested for inode %u\n",
	ext_debug("blocks %u/%lu requested for inode %u\n",
@@ -2397,8 +2400,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
		ext4_init_block_alloc_info(inode);
		ext4_init_block_alloc_info(inode);


	/* allocate new block */
	/* find neighbour allocated blocks */
	goal = ext4_ext_find_goal(inode, path, iblock);
	ar.lleft = iblock;
	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
	if (err)
		goto out2;
	ar.lright = iblock;
	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
	if (err)
		goto out2;


	/*
	/*
	 * See if request is beyond maximum number of blocks we can have in
	 * See if request is beyond maximum number of blocks we can have in
@@ -2421,7 +2431,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
		allocated = le16_to_cpu(newex.ee_len);
		allocated = le16_to_cpu(newex.ee_len);
	else
	else
		allocated = max_blocks;
		allocated = max_blocks;
	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);

	/* allocate new block */
	ar.inode = inode;
	ar.goal = ext4_ext_find_goal(inode, path, iblock);
	ar.logical = iblock;
	ar.len = allocated;
	if (S_ISREG(inode->i_mode))
		ar.flags = EXT4_MB_HINT_DATA;
	else
		/* disable in-core preallocation for non-regular files */
		ar.flags = 0;
	newblock = ext4_mb_new_blocks(handle, &ar, &err);
	if (!newblock)
	if (!newblock)
		goto out2;
		goto out2;
	ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
	ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
@@ -2429,14 +2450,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,


	/* try to insert new extent into found leaf and return */
	/* try to insert new extent into found leaf and return */
	ext4_ext_store_pblock(&newex, newblock);
	ext4_ext_store_pblock(&newex, newblock);
	newex.ee_len = cpu_to_le16(allocated);
	newex.ee_len = cpu_to_le16(ar.len);
	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
		ext4_ext_mark_uninitialized(&newex);
		ext4_ext_mark_uninitialized(&newex);
	err = ext4_ext_insert_extent(handle, inode, path, &newex);
	err = ext4_ext_insert_extent(handle, inode, path, &newex);
	if (err) {
	if (err) {
		/* free data blocks we just allocated */
		/* free data blocks we just allocated */
		/* not a good idea to call discard here directly,
		 * but otherwise we'd need to call it every free() */
		ext4_mb_discard_inode_preallocations(inode);
		ext4_free_blocks(handle, inode, ext_pblock(&newex),
		ext4_free_blocks(handle, inode, ext_pblock(&newex),
					le16_to_cpu(newex.ee_len));
					le16_to_cpu(newex.ee_len), 0);
		goto out2;
		goto out2;
	}
	}


@@ -2445,6 +2469,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,


	/* previous routine could use block we allocated */
	/* previous routine could use block we allocated */
	newblock = ext_pblock(&newex);
	newblock = ext_pblock(&newex);
	allocated = le16_to_cpu(newex.ee_len);
outnew:
outnew:
	__set_bit(BH_New, &bh_result->b_state);
	__set_bit(BH_New, &bh_result->b_state);


@@ -2496,6 +2521,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
	down_write(&EXT4_I(inode)->i_data_sem);
	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_ext_invalidate_cache(inode);
	ext4_ext_invalidate_cache(inode);


	ext4_mb_discard_inode_preallocations(inode);

	/*
	/*
	 * TODO: optimization is possible here.
	 * TODO: optimization is possible here.
	 * Probably we need not scan at all,
	 * Probably we need not scan at all,
Loading