Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c9de560d authored by Alex Tomas's avatar Alex Tomas Committed by Theodore Ts'o
Browse files

ext4: Add multi block allocator for ext4

parent 1988b51e
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -86,9 +86,11 @@ Alex is working on a new set of patches right now.
When mounting an ext4 filesystem, the following option are accepted:
(*) == default

extents			ext4 will use extents to address file data.  The
extents		(*)	ext4 will use extents to address file data.  The
			file system will no longer be mountable by ext3.

noextents		ext4 will not use extents for newly created files

journal_checksum	Enable checksumming of the journal transactions.
			This will allow the recovery code in e2fsck and the
			kernel to detect corruption in the kernel.  It is a
@@ -206,6 +208,12 @@ nobh (a) cache disk block mapping information
			"nobh" option tries to avoid associating buffer
			heads (supported only for "writeback" mode).

mballoc		(*)	Use the multiple block allocator for block allocation
nomballoc		disabled multiple block allocator for block allocation.
stripe=n		Number of filesystem blocks that mballoc will try
			to use for allocation size and alignment. For RAID5/6
			systems this should be the number of data
			disks *  RAID chunk size in file system blocks.

Data Mode
---------
+39 −0
Original line number Diff line number Diff line
@@ -857,6 +857,45 @@ CPUs.
The   "procs_blocked" line gives  the  number of  processes currently blocked,
waiting for I/O to complete.

1.9 Ext4 file system parameters
------------------------------
Ext4 file system have one directory per partition under /proc/fs/ext4/
# ls /proc/fs/ext4/hdc/
group_prealloc  max_to_scan  mb_groups  mb_history  min_to_scan  order2_req
stats  stream_req

mb_groups:
This file gives the details of mutiblock allocator buddy cache of free blocks

mb_history:
Multiblock allocation history.

stats:
This file indicate whether the multiblock allocator should start collecting
statistics. The statistics are shown during unmount

group_prealloc:
The multiblock allocator normalize the block allocation request to
group_prealloc filesystem blocks if we don't have strip value set.
The stripe value can be specified at mount time or during mke2fs.

max_to_scan:
How long multiblock allocator can look for a best extent (in found extents)

min_to_scan:
How long multiblock allocator  must look for a best extent

order2_req:
Multiblock allocator use  2^N search using buddies only for requests greater
than or equal to order2_req. The request size is specfied in file system
blocks. A value of 2 indicate only if the requests are greater than or equal
to 4 blocks.

stream_req:
Files smaller than stream_req are served by the stream allocator, whose
purpose is to pack requests as close each to other as possible to
produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
filesystem block size will use group based preallocation.

------------------------------------------------------------------------------
Summary
+1 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o

ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
		   ext4_jbd2.o migrate.o
		   ext4_jbd2.o migrate.o mballoc.o

ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
+56 −11
Original line number Diff line number Diff line
@@ -577,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode)
	struct ext4_reserve_window_node *rsv;
	spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;

	ext4_mb_discard_inode_preallocations(inode);

	if (!block_i)
		return;

@@ -785,19 +787,29 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 * @inode:		inode
 * @block:		start physical block to free
 * @count:		number of blocks to count
 * @metadata: 		Are these metadata blocks
 */
void ext4_free_blocks(handle_t *handle, struct inode *inode,
			ext4_fsblk_t block, unsigned long count)
			ext4_fsblk_t block, unsigned long count,
			int metadata)
{
	struct super_block * sb;
	unsigned long dquot_freed_blocks;

	/* this isn't the right place to decide whether block is metadata
	 * inode.c/extents.c knows better, but for safety ... */
	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
			ext4_should_journal_data(inode))
		metadata = 1;

	sb = inode->i_sb;
	if (!sb) {
		printk ("ext4_free_blocks: nonexistent device");
		return;
	}
	ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);

	if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
		ext4_free_blocks_sb(handle, sb, block, count,
						&dquot_freed_blocks);
	else
		ext4_mb_free_blocks(handle, inode, block, count,
						metadata, &dquot_freed_blocks);
	if (dquot_freed_blocks)
		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
	return;
@@ -1576,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
}

/**
 * ext4_new_blocks() -- core block(s) allocation function
 * ext4_new_blocks_old() -- core block(s) allocation function
 * @handle:		handle to this transaction
 * @inode:		file inode
 * @goal:		given target block(filesystem wide)
@@ -1589,7 +1601,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 * any specific goal block.
 *
 */
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
			ext4_fsblk_t goal, unsigned long *count, int *errp)
{
	struct buffer_head *bitmap_bh = NULL;
@@ -1851,11 +1863,44 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
		ext4_fsblk_t goal, int *errp)
{
	struct ext4_allocation_request ar;
	ext4_fsblk_t ret;

	if (!test_opt(inode->i_sb, MBALLOC)) {
		unsigned long count = 1;
		ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
		return ret;
	}

	return ext4_new_blocks(handle, inode, goal, &count, errp);
	memset(&ar, 0, sizeof(ar));
	ar.inode = inode;
	ar.goal = goal;
	ar.len = 1;
	ret = ext4_mb_new_blocks(handle, &ar, errp);
	return ret;
}

ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
		ext4_fsblk_t goal, unsigned long *count, int *errp)
{
	struct ext4_allocation_request ar;
	ext4_fsblk_t ret;

	if (!test_opt(inode->i_sb, MBALLOC)) {
		ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
		return ret;
	}

	memset(&ar, 0, sizeof(ar));
	ar.inode = inode;
	ar.goal = goal;
	ar.len = *count;
	ret = ext4_mb_new_blocks(handle, &ar, errp);
	*count = ar.len;
	return ret;
}


/**
 * ext4_count_free_blocks() -- count filesystem free blocks
 * @sb:		superblock
+36 −9
Original line number Diff line number Diff line
@@ -853,7 +853,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
		for (i = 0; i < depth; i++) {
			if (!ablocks[i])
				continue;
			ext4_free_blocks(handle, inode, ablocks[i], 1);
			ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
		}
	}
	kfree(ablocks);
@@ -1698,7 +1698,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
	ext_debug("index is empty, remove it, free block %llu\n", leaf);
	bh = sb_find_get_block(inode->i_sb, leaf);
	ext4_forget(handle, 1, inode, bh, leaf);
	ext4_free_blocks(handle, inode, leaf, 1);
	ext4_free_blocks(handle, inode, leaf, 1, 1);
	return err;
}

@@ -1759,8 +1759,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
{
	struct buffer_head *bh;
	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
	int i;
	int i, metadata = 0;

	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
		metadata = 1;
#ifdef EXTENTS_STATS
	{
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -1789,7 +1791,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
			bh = sb_find_get_block(inode->i_sb, start + i);
			ext4_forget(handle, 0, inode, bh, start + i);
		}
		ext4_free_blocks(handle, inode, start, num);
		ext4_free_blocks(handle, inode, start, num, metadata);
	} else if (from == le32_to_cpu(ex->ee_block)
		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2287,6 +2289,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
	ext4_fsblk_t goal, newblock;
	int err = 0, depth, ret;
	unsigned long allocated = 0;
	struct ext4_allocation_request ar;

	__clear_bit(BH_New, &bh_result->b_state);
	ext_debug("blocks %u/%lu requested for inode %u\n",
@@ -2397,8 +2400,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
		ext4_init_block_alloc_info(inode);

	/* allocate new block */
	goal = ext4_ext_find_goal(inode, path, iblock);
	/* find neighbour allocated blocks */
	ar.lleft = iblock;
	err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
	if (err)
		goto out2;
	ar.lright = iblock;
	err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
	if (err)
		goto out2;

	/*
	 * See if request is beyond maximum number of blocks we can have in
@@ -2421,7 +2431,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
		allocated = le16_to_cpu(newex.ee_len);
	else
		allocated = max_blocks;
	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);

	/* allocate new block */
	ar.inode = inode;
	ar.goal = ext4_ext_find_goal(inode, path, iblock);
	ar.logical = iblock;
	ar.len = allocated;
	if (S_ISREG(inode->i_mode))
		ar.flags = EXT4_MB_HINT_DATA;
	else
		/* disable in-core preallocation for non-regular files */
		ar.flags = 0;
	newblock = ext4_mb_new_blocks(handle, &ar, &err);
	if (!newblock)
		goto out2;
	ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
@@ -2429,14 +2450,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,

	/* try to insert new extent into found leaf and return */
	ext4_ext_store_pblock(&newex, newblock);
	newex.ee_len = cpu_to_le16(allocated);
	newex.ee_len = cpu_to_le16(ar.len);
	if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
		ext4_ext_mark_uninitialized(&newex);
	err = ext4_ext_insert_extent(handle, inode, path, &newex);
	if (err) {
		/* free data blocks we just allocated */
		/* not a good idea to call discard here directly,
		 * but otherwise we'd need to call it every free() */
		ext4_mb_discard_inode_preallocations(inode);
		ext4_free_blocks(handle, inode, ext_pblock(&newex),
					le16_to_cpu(newex.ee_len));
					le16_to_cpu(newex.ee_len), 0);
		goto out2;
	}

@@ -2445,6 +2469,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,

	/* previous routine could use block we allocated */
	newblock = ext_pblock(&newex);
	allocated = le16_to_cpu(newex.ee_len);
outnew:
	__set_bit(BH_New, &bh_result->b_state);

@@ -2496,6 +2521,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_ext_invalidate_cache(inode);

	ext4_mb_discard_inode_preallocations(inode);

	/*
	 * TODO: optimization is possible here.
	 * Probably we need not scan at all,
Loading