Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8a57d9d6 authored by Curt Wohlgemuth's avatar Curt Wohlgemuth Committed by Theodore Ts'o
Browse files

ext4: check for a good block group before loading buddy pages



This adds a new field in ext4_group_info to cache the largest available
block range in a block group; and don't load the buddy pages until *after*
we've done a sanity check on the block group.

With large allocation requests (e.g., fallocate(), 8MiB) and relatively full
partitions, it's easy to have no block groups with a block extent large
enough to satisfy the input request length.  This currently causes the loop
during cr == 0 in ext4_mb_regular_allocator() to load the buddy bitmap pages
for EVERY block group.  That can be a lot of pages.  The patch below allows
us to call ext4_mb_good_group() BEFORE we load the buddy pages (although we
have check again after we lock the block group).

Addresses-Google-Bug: #2578108
Addresses-Google-Bug: #2704453

Signed-off-by: default avatarCurt Wohlgemuth <curtw@google.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 6d19c42b
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -1678,6 +1678,7 @@ struct ext4_group_info {
	ext4_grpblk_t	bb_first_free;	/* first free block */
	ext4_grpblk_t	bb_first_free;	/* first free block */
	ext4_grpblk_t	bb_free;	/* total free blocks */
	ext4_grpblk_t	bb_free;	/* total free blocks */
	ext4_grpblk_t	bb_fragments;	/* nr of freespace fragments */
	ext4_grpblk_t	bb_fragments;	/* nr of freespace fragments */
	ext4_grpblk_t	bb_largest_free_order;/* order of largest frag in BG */
	struct          list_head bb_prealloc_list;
	struct          list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
#ifdef DOUBLE_CHECK
	void            *bb_bitmap;
	void            *bb_bitmap;
+57 −13
Original line number Original line Diff line number Diff line
@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
	}
	}
}
}


/*
 * Cache the order of the largest free extent we have available in this block
 * group.
 */
static void
mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
{
	int i;
	int bits;

	grp->bb_largest_free_order = -1; /* uninit */

	bits = sb->s_blocksize_bits + 1;
	for (i = bits; i >= 0; i--) {
		if (grp->bb_counters[i] > 0) {
			grp->bb_largest_free_order = i;
			break;
		}
	}
}

static noinline_for_stack
static noinline_for_stack
void ext4_mb_generate_buddy(struct super_block *sb,
void ext4_mb_generate_buddy(struct super_block *sb,
				void *buddy, void *bitmap, ext4_group_t group)
				void *buddy, void *bitmap, ext4_group_t group)
@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
		 */
		 */
		grp->bb_free = free;
		grp->bb_free = free;
	}
	}
	mb_set_largest_free_order(sb, grp);


	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));


@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
 * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
 * So it can have information regarding groups_per_page which
 * So it can have information regarding groups_per_page which
 * is blocks_per_page/2
 * is blocks_per_page/2
 *
 * Locking note:  This routine takes the block group lock of all groups
 * for this page; do not hold this lock when calling this routine!
 */
 */


static int ext4_mb_init_cache(struct page *page, char *incore)
static int ext4_mb_init_cache(struct page *page, char *incore)
@@ -910,6 +935,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
	return err;
	return err;
}
}


/*
 * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
 * block group lock of all groups for this page; do not hold the BG lock when
 * calling this routine!
 */
static noinline_for_stack
static noinline_for_stack
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
{
{
@@ -1004,6 +1034,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
	return ret;
	return ret;
}
}


/*
 * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
 * block group lock of all groups for this page; do not hold the BG lock when
 * calling this routine!
 */
static noinline_for_stack int
static noinline_for_stack int
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
					struct ext4_buddy *e4b)
					struct ext4_buddy *e4b)
@@ -1299,6 +1334,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
			buddy = buddy2;
			buddy = buddy2;
		} while (1);
		} while (1);
	}
	}
	mb_set_largest_free_order(sb, e4b->bd_info);
	mb_check_buddy(e4b);
	mb_check_buddy(e4b);
}
}


@@ -1427,6 +1463,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
		e4b->bd_info->bb_counters[ord]++;
		e4b->bd_info->bb_counters[ord]++;
		e4b->bd_info->bb_counters[ord]++;
		e4b->bd_info->bb_counters[ord]++;
	}
	}
	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);


	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
	mb_check_buddy(e4b);
	mb_check_buddy(e4b);
@@ -1821,16 +1858,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
	}
	}
}
}


/* This is now called BEFORE we load the buddy bitmap. */
static int ext4_mb_good_group(struct ext4_allocation_context *ac,
static int ext4_mb_good_group(struct ext4_allocation_context *ac,
				ext4_group_t group, int cr)
				ext4_group_t group, int cr)
{
{
	unsigned free, fragments;
	unsigned free, fragments;
	unsigned i, bits;
	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
	int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);


	BUG_ON(cr < 0 || cr >= 4);
	BUG_ON(cr < 0 || cr >= 4);
	BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));

	/* We only do this if the grp has never been initialized */
	if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
		int ret = ext4_mb_init_group(ac->ac_sb, group);
		if (ret)
			return 0;
	}


	free = grp->bb_free;
	free = grp->bb_free;
	fragments = grp->bb_fragments;
	fragments = grp->bb_fragments;
@@ -1843,17 +1886,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
	case 0:
	case 0:
		BUG_ON(ac->ac_2order == 0);
		BUG_ON(ac->ac_2order == 0);


		if (grp->bb_largest_free_order < ac->ac_2order)
			return 0;

		/* Avoid using the first bg of a flexgroup for data files */
		/* Avoid using the first bg of a flexgroup for data files */
		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
		if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
		    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
		    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
		    ((group % flex_size) == 0))
		    ((group % flex_size) == 0))
			return 0;
			return 0;


		bits = ac->ac_sb->s_blocksize_bits + 1;
		for (i = ac->ac_2order; i <= bits; i++)
			if (grp->bb_counters[i] > 0)
		return 1;
		return 1;
		break;
	case 1:
	case 1:
		if ((free / fragments) >= ac->ac_g_ex.fe_len)
		if ((free / fragments) >= ac->ac_g_ex.fe_len)
			return 1;
			return 1;
@@ -2024,14 +2066,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
		group = ac->ac_g_ex.fe_group;
		group = ac->ac_g_ex.fe_group;


		for (i = 0; i < ngroups; group++, i++) {
		for (i = 0; i < ngroups; group++, i++) {
			struct ext4_group_info *grp;

			if (group == ngroups)
			if (group == ngroups)
				group = 0;
				group = 0;


			/* quick check to skip empty groups */
			/* This now checks without needing the buddy page */
			grp = ext4_get_group_info(sb, group);
			if (!ext4_mb_good_group(ac, group, cr))
			if (grp->bb_free == 0)
				continue;
				continue;


			err = ext4_mb_load_buddy(sb, group, &e4b);
			err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -2039,8 +2078,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
				goto out;
				goto out;


			ext4_lock_group(sb, group);
			ext4_lock_group(sb, group);

			/*
			 * We need to check again after locking the
			 * block group
			 */
			if (!ext4_mb_good_group(ac, group, cr)) {
			if (!ext4_mb_good_group(ac, group, cr)) {
				/* someone did allocation from this group */
				ext4_unlock_group(sb, group);
				ext4_unlock_group(sb, group);
				ext4_mb_unload_buddy(&e4b);
				ext4_mb_unload_buddy(&e4b);
				continue;
				continue;
@@ -2253,6 +2296,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
	init_rwsem(&meta_group_info[i]->alloc_sem);
	init_rwsem(&meta_group_info[i]->alloc_sem);
	meta_group_info[i]->bb_free_root = RB_ROOT;
	meta_group_info[i]->bb_free_root = RB_ROOT;
	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */


#ifdef DOUBLE_CHECK
#ifdef DOUBLE_CHECK
	{
	{