Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5e745b04 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Theodore Ts'o
Browse files

ext4: Fix small file fragmentation



For small file block allocations, mballoc uses per cpu prealloc
space.  Use goal block when searching for the right prealloc
space.  Also make sure ext4_da_writepages tries to write
all the pages for small files in single attempt

Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 91246c00
Loading
Loading
Loading
Loading
+15 −6
Original line number Diff line number Diff line
@@ -2282,13 +2282,12 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int ext4_da_writepages(struct address_space *mapping,
			      struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	handle_t *handle = NULL;
	int needed_blocks;
	int ret = 0;
	long to_write;
	loff_t range_start = 0;
	long pages_skipped = 0;
	struct inode *inode = mapping->host;
	int needed_blocks, ret = 0, nr_to_writebump = 0;
	long to_write, pages_skipped = 0;
	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);

	/*
	 * No pages to write? This is mainly a kludge to avoid starting
@@ -2297,6 +2296,16 @@ static int ext4_da_writepages(struct address_space *mapping,
	 */
	if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		return 0;
	/*
	 * Make sure nr_to_write is >= sbi->s_mb_stream_request
	 * This make sure small files blocks are allocated in
	 * single attempt. This ensure that small files
	 * get less fragmented.
	 */
	if (wbc->nr_to_write < sbi->s_mb_stream_request) {
		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
		wbc->nr_to_write = sbi->s_mb_stream_request;
	}

	if (!wbc->range_cyclic)
		/*
@@ -2377,7 +2386,7 @@ static int ext4_da_writepages(struct address_space *mapping,
	}

out_writepages:
	wbc->nr_to_write = to_write;
	wbc->nr_to_write = to_write - nr_to_writebump;
	wbc->range_start = range_start;
	return ret;
}
+46 −7
Original line number Diff line number Diff line
@@ -3281,6 +3281,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
	mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
}

/*
 * Return the prealloc space that have minimal distance
 * from the goal block. @cpa is the prealloc
 * space that is having currently known minimal distance
 * from the goal block.
 */
static struct ext4_prealloc_space *
ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
			struct ext4_prealloc_space *pa,
			struct ext4_prealloc_space *cpa)
{
	ext4_fsblk_t cur_distance, new_distance;

	if (cpa == NULL) {
		atomic_inc(&pa->pa_count);
		return pa;
	}
	cur_distance = abs(goal_block - cpa->pa_pstart);
	new_distance = abs(goal_block - pa->pa_pstart);

	if (cur_distance < new_distance)
		return cpa;

	/* drop the previous reference */
	atomic_dec(&cpa->pa_count);
	atomic_inc(&pa->pa_count);
	return pa;
}

/*
 * search goal blocks in preallocated space
 */
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
	int order, i;
	struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
	struct ext4_locality_group *lg;
	struct ext4_prealloc_space *pa;
	struct ext4_prealloc_space *pa, *cpa = NULL;
	ext4_fsblk_t goal_block;

	/* only data can be preallocated */
	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
		/* The max size of hash table is PREALLOC_TB_SIZE */
		order = PREALLOC_TB_SIZE - 1;

	goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
		     ac->ac_g_ex.fe_start +
		     le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
	/*
	 * search for the prealloc space that is having
	 * minimal distance from the goal block.
	 */
	for (i = order; i < PREALLOC_TB_SIZE; i++) {
		rcu_read_lock();
		list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
			spin_lock(&pa->pa_lock);
			if (pa->pa_deleted == 0 &&
					pa->pa_free >= ac->ac_o_ex.fe_len) {
				atomic_inc(&pa->pa_count);
				ext4_mb_use_group_pa(ac, pa);
				spin_unlock(&pa->pa_lock);
				ac->ac_criteria = 20;
				rcu_read_unlock();
				return 1;

				cpa = ext4_mb_check_group_pa(goal_block,
								pa, cpa);
			}
			spin_unlock(&pa->pa_lock);
		}
		rcu_read_unlock();
	}
	if (cpa) {
		ext4_mb_use_group_pa(ac, cpa);
		ac->ac_criteria = 20;
		return 1;
	}
	return 0;
}