Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 32cfd5c3 authored by Zhang Yi's avatar Zhang Yi Committed by Greg Kroah-Hartman
Browse files

ext4: make sure allocate pending entry not fail



[ Upstream commit 8e387c89e96b9543a339f84043cf9df15fed2632 ]

__insert_pending() allocate memory in atomic context, so the allocation
could fail, but we are not handling that failure now. It could lead
ext4_es_remove_extent() to get wrong reserved clusters, and the global
data blocks reservation count will be incorrect. The same to
extents_status entry preallocation, preallocate pending entry out of the
i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and
__revise_pending() always succeeds.

Signed-off-by: default avatarZhang Yi <yi.zhang@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com


Reviewed-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent 70edeedd
Loading
Loading
Loading
Loading
+89 −34
Original line number Diff line number Diff line
@@ -152,8 +152,9 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
		       struct ext4_inode_info *locked_ei);
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			     ext4_lblk_t len);
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			    ext4_lblk_t len,
			    struct pending_reservation **prealloc);

int __init ext4_init_es(void)
{
@@ -441,6 +442,19 @@ static void ext4_es_list_del(struct inode *inode)
	spin_unlock(&sbi->s_es_lock);
}

static inline struct pending_reservation *__alloc_pending(bool nofail)
{
	if (!nofail)
		return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);

	return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
}

static inline void __free_pending(struct pending_reservation *pr)
{
	kmem_cache_free(ext4_pending_cachep, pr);
}

/*
 * Returns true if we cannot fail to allocate memory for this extent_status
 * entry and cannot reclaim it until its status changes.
@@ -832,11 +846,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
{
	struct extent_status newes;
	ext4_lblk_t end = lblk + len - 1;
	int err1 = 0;
	int err2 = 0;
	int err1 = 0, err2 = 0, err3 = 0;
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct extent_status *es1 = NULL;
	struct extent_status *es2 = NULL;
	struct pending_reservation *pr = NULL;
	bool revise_pending = false;

	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
		 lblk, len, pblk, status, inode->i_ino);
@@ -861,11 +876,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,

	ext4_es_insert_extent_check(inode, &newes);

	revise_pending = sbi->s_cluster_ratio > 1 &&
			 test_opt(inode->i_sb, DELALLOC) &&
			 (status & (EXTENT_STATUS_WRITTEN |
				    EXTENT_STATUS_UNWRITTEN));
retry:
	if (err1 && !es1)
		es1 = __es_alloc_extent(true);
	if ((err1 || err2) && !es2)
		es2 = __es_alloc_extent(true);
	if ((err1 || err2 || err3) && revise_pending && !pr)
		pr = __alloc_pending(true);
	write_lock(&EXT4_I(inode)->i_es_lock);

	err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
@@ -890,13 +911,18 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
		es2 = NULL;
	}

	if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
	    (status & EXTENT_STATUS_WRITTEN ||
	     status & EXTENT_STATUS_UNWRITTEN))
		__revise_pending(inode, lblk, len);
	if (revise_pending) {
		err3 = __revise_pending(inode, lblk, len, &pr);
		if (err3 != 0)
			goto error;
		if (pr) {
			__free_pending(pr);
			pr = NULL;
		}
	}
error:
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err1 || err2)
	if (err1 || err2 || err3)
		goto retry;

	ext4_es_print_tree(inode);
@@ -1298,7 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
				rc->ndelonly--;
				node = rb_next(&pr->rb_node);
				rb_erase(&pr->rb_node, &tree->root);
				kmem_cache_free(ext4_pending_cachep, pr);
				__free_pending(pr);
				if (!node)
					break;
				pr = rb_entry(node, struct pending_reservation,
@@ -1892,11 +1918,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
 *
 * @inode - file containing the cluster
 * @lblk - logical block in the cluster to be added
 * @prealloc - preallocated pending entry
 *
 * Returns 0 on successful insertion and -ENOMEM on failure.  If the
 * pending reservation is already in the set, returns successfully.
 */
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
			    struct pending_reservation **prealloc)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
@@ -1922,11 +1950,16 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
		}
	}

	pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
	if (pr == NULL) {
	if (likely(*prealloc == NULL)) {
		pr = __alloc_pending(false);
		if (!pr) {
			ret = -ENOMEM;
			goto out;
		}
	} else {
		pr = *prealloc;
		*prealloc = NULL;
	}
	pr->lclu = lclu;

	rb_link_node(&pr->rb_node, parent, p);
@@ -1955,7 +1988,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
	if (pr != NULL) {
		tree = &EXT4_I(inode)->i_pending_tree;
		rb_erase(&pr->rb_node, &tree->root);
		kmem_cache_free(ext4_pending_cachep, pr);
		__free_pending(pr);
	}
}

@@ -2016,10 +2049,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
				 bool allocated)
{
	struct extent_status newes;
	int err1 = 0;
	int err2 = 0;
	int err1 = 0, err2 = 0, err3 = 0;
	struct extent_status *es1 = NULL;
	struct extent_status *es2 = NULL;
	struct pending_reservation *pr = NULL;

	es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
		 lblk, inode->i_ino);
@@ -2036,6 +2069,8 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
		es1 = __es_alloc_extent(true);
	if ((err1 || err2) && !es2)
		es2 = __es_alloc_extent(true);
	if ((err1 || err2 || err3) && allocated && !pr)
		pr = __alloc_pending(true);
	write_lock(&EXT4_I(inode)->i_es_lock);

	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
@@ -2058,11 +2093,18 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
		es2 = NULL;
	}

	if (allocated)
		__insert_pending(inode, lblk);
	if (allocated) {
		err3 = __insert_pending(inode, lblk, &pr);
		if (err3 != 0)
			goto error;
		if (pr) {
			__free_pending(pr);
			pr = NULL;
		}
	}
error:
	write_unlock(&EXT4_I(inode)->i_es_lock);
	if (err1 || err2)
	if (err1 || err2 || err3)
		goto retry;

	ext4_es_print_tree(inode);
@@ -2168,21 +2210,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
 * @inode - file containing the range
 * @lblk - logical block defining the start of range
 * @len  - length of range in blocks
 * @prealloc - preallocated pending entry
 *
 * Used after a newly allocated extent is added to the extents status tree.
 * Requires that the extents in the range have either written or unwritten
 * status.  Must be called while holding i_es_lock.
 */
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			     ext4_lblk_t len)
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			    ext4_lblk_t len,
			    struct pending_reservation **prealloc)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	ext4_lblk_t end = lblk + len - 1;
	ext4_lblk_t first, last;
	bool f_del = false, l_del = false;
	int ret = 0;

	if (len == 0)
		return;
		return 0;

	/*
	 * Two cases - block range within single cluster and block range
@@ -2203,7 +2248,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
						first, lblk - 1);
		if (f_del) {
			__insert_pending(inode, first);
			ret = __insert_pending(inode, first, prealloc);
			if (ret < 0)
				goto out;
		} else {
			last = EXT4_LBLK_CMASK(sbi, end) +
			       sbi->s_cluster_ratio - 1;
@@ -2211,9 +2258,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
				l_del = __es_scan_range(inode,
							&ext4_es_is_delonly,
							end + 1, last);
			if (l_del)
				__insert_pending(inode, last);
			else
			if (l_del) {
				ret = __insert_pending(inode, last, prealloc);
				if (ret < 0)
					goto out;
			} else
				__remove_pending(inode, last);
		}
	} else {
@@ -2221,18 +2270,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
		if (first != lblk)
			f_del = __es_scan_range(inode, &ext4_es_is_delonly,
						first, lblk - 1);
		if (f_del)
			__insert_pending(inode, first);
		else
		if (f_del) {
			ret = __insert_pending(inode, first, prealloc);
			if (ret < 0)
				goto out;
		} else
			__remove_pending(inode, first);

		last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
		if (last != end)
			l_del = __es_scan_range(inode, &ext4_es_is_delonly,
						end + 1, last);
		if (l_del)
			__insert_pending(inode, last);
		else
		if (l_del) {
			ret = __insert_pending(inode, last, prealloc);
			if (ret < 0)
				goto out;
		} else
			__remove_pending(inode, last);
	}
out:
	return ret;
}