ext4: Fix the race between read_inode_bitmap() and ext4_new_inode() (39341867) · Commits · e / devices / android_kernel_oneplus_sm7250

fs/ext4/ialloc.c

+86 −60

Original line number	Diff line number	Diff line
		@@ -572,6 +572,79 @@ static int find_group_other(struct super_block sb, struct inode parent,
		return -1;
		}

		/*
		* claim the inode from the inode bitmap. If the group
		* is uninit we need to take the groups's sb_bgl_lock
		* and clear the uninit flag. The inode bitmap update
		* and group desc uninit flag clear should be done
		* after holding sb_bgl_lock so that ext4_read_inode_bitmap
		* doesn't race with the ext4_claim_inode
		*/
		static int ext4_claim_inode(struct super_block *sb,
		struct buffer_head *inode_bitmap_bh,
		unsigned long ino, ext4_group_t group, int mode)
		{
		int free = 0, retval = 0, count;
		struct ext4_sb_info *sbi = EXT4_SB(sb);
		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);

		spin_lock(sb_bgl_lock(sbi, group));
		if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
		/* not a free inode */
		retval = 1;
		goto err_ret;
		}
		ino++;
		if ((group == 0 && ino < EXT4_FIRST_INO(sb)) \|\|
		ino > EXT4_INODES_PER_GROUP(sb)) {
		spin_unlock(sb_bgl_lock(sbi, group));
		ext4_error(sb, __func__,
		"reserved inode or inode > inodes count - "
		"block_group = %u, inode=%lu", group,
		ino + group * EXT4_INODES_PER_GROUP(sb));
		return 1;
		}
		/* If we didn't allocate from within the initialized part of the inode
		* table then we need to initialize up to this inode. */
		if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {

		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
		/* When marking the block group with
		* ~EXT4_BG_INODE_UNINIT we don't want to depend
		* on the value of bg_itable_unused even though
		* mke2fs could have initialized the same for us.
		* Instead we calculated the value below
		*/

		free = 0;
		} else {
		free = EXT4_INODES_PER_GROUP(sb) -
		ext4_itable_unused_count(sb, gdp);
		}

		/*
		* Check the relative inode number against the last used
		* relative inode number in this group. if it is greater
		* we need to update the bg_itable_unused count
		*
		*/
		if (ino > free)
		ext4_itable_unused_set(sb, gdp,
		(EXT4_INODES_PER_GROUP(sb) - ino));
		}
		count = ext4_free_inodes_count(sb, gdp) - 1;
		ext4_free_inodes_set(sb, gdp, count);
		if (S_ISDIR(mode)) {
		count = ext4_used_dirs_count(sb, gdp) + 1;
		ext4_used_dirs_set(sb, gdp, count);
		}
		gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
		err_ret:
		spin_unlock(sb_bgl_lock(sbi, group));
		return retval;
		}

		/*
		* There are two policies for allocating an inode. If the new inode is
		* a directory, then a forward search is made for a block group with both
		@@ -594,7 +667,7 @@ struct inode ext4_new_inode(handle_t handle, struct inode *dir, int mode)
		struct ext4_super_block *es;
		struct ext4_inode_info *ei;
		struct ext4_sb_info *sbi;
		int ret2, err = 0, count;
		int ret2, err = 0;
		struct inode *ret;
		ext4_group_t i;
		int free = 0;
		@@ -658,8 +731,13 @@ struct inode ext4_new_inode(handle_t handle, struct inode *dir, int mode)
		if (err)
		goto fail;

		if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
		ino, inode_bitmap_bh->b_data)) {
		BUFFER_TRACE(group_desc_bh, "get_write_access");
		err = ext4_journal_get_write_access(handle,
		group_desc_bh);
		if (err)
		goto fail;
		if (!ext4_claim_inode(sb, inode_bitmap_bh,
		ino, group, mode)) {
		/* we won it */
		BUFFER_TRACE(inode_bitmap_bh,
		"call ext4_handle_dirty_metadata");
		@@ -668,10 +746,13 @@ struct inode ext4_new_inode(handle_t handle, struct inode *dir, int mode)
		inode_bitmap_bh);
		if (err)
		goto fail;
		/* zero bit is inode number 1*/
		ino++;
		goto got;
		}
		/* we lost it */
		ext4_handle_release_buffer(handle, inode_bitmap_bh);
		ext4_handle_release_buffer(handle, group_desc_bh);

		if (++ino < EXT4_INODES_PER_GROUP(sb))
		goto repeat_in_this_group;
		@@ -691,22 +772,6 @@ struct inode ext4_new_inode(handle_t handle, struct inode *dir, int mode)
		goto out;

		got:
		ino++;
		if ((group == 0 && ino < EXT4_FIRST_INO(sb)) \|\|
		ino > EXT4_INODES_PER_GROUP(sb)) {
		ext4_error(sb, __func__,
		"reserved inode or inode > inodes count - "
		"block_group = %u, inode=%lu", group,
		ino + group * EXT4_INODES_PER_GROUP(sb));
		err = -EIO;
		goto fail;
		}

		BUFFER_TRACE(group_desc_bh, "get_write_access");
		err = ext4_journal_get_write_access(handle, group_desc_bh);
		if (err)
		goto fail;

		/* We may have to initialize the block bitmap if it isn't already */
		if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
		gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
		@@ -743,49 +808,10 @@ struct inode ext4_new_inode(handle_t handle, struct inode *dir, int mode)
		if (err)
		goto fail;
		}

		spin_lock(sb_bgl_lock(sbi, group));
		/* If we didn't allocate from within the initialized part of the inode
		* table then we need to initialize up to this inode. */
		if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);

		/* When marking the block group with
		* ~EXT4_BG_INODE_UNINIT we don't want to depend
		* on the value of bg_itable_unused even though
		* mke2fs could have initialized the same for us.
		* Instead we calculated the value below
		*/

		free = 0;
		} else {
		free = EXT4_INODES_PER_GROUP(sb) -
		ext4_itable_unused_count(sb, gdp);
		}

		/*
		* Check the relative inode number against the last used
		* relative inode number in this group. if it is greater
		* we need to update the bg_itable_unused count
		*
		*/
		if (ino > free)
		ext4_itable_unused_set(sb, gdp,
		(EXT4_INODES_PER_GROUP(sb) - ino));
		}

		count = ext4_free_inodes_count(sb, gdp) - 1;
		ext4_free_inodes_set(sb, gdp, count);
		if (S_ISDIR(mode)) {
		count = ext4_used_dirs_count(sb, gdp) + 1;
		ext4_used_dirs_set(sb, gdp, count);
		}
		gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
		spin_unlock(sb_bgl_lock(sbi, group));
		BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
		err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
		if (err) goto fail;
		if (err)
		goto fail;

		percpu_counter_dec(&sbi->s_freeinodes_counter);
		if (S_ISDIR(mode))