Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5d1b1b3f authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Theodore Ts'o
Browse files

ext4: fix BUG when calling ext4_error with locked block group



The mballoc code likes to call ext4_error while it is holding locked
block groups.  This can causes a scheduling in atomic context BUG.  We
can't just unlock the block group and relock it after/if ext4_error
returns since that might result in race conditions in the case where
the filesystem is set to continue after finding errors.

Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent b7be019e
Loading
Loading
Loading
Loading
+47 −0
Original line number Diff line number Diff line
@@ -1126,6 +1126,9 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
	__attribute__ ((format (printf, 3, 4)));
extern void ext4_warning(struct super_block *, const char *, const char *, ...)
	__attribute__ ((format (printf, 3, 4)));
extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
				const char *, const char *, ...)
	__attribute__ ((format (printf, 4, 5)));
extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
					__u32 compat);
@@ -1249,6 +1252,50 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
	return ;
}

struct ext4_group_info {
	unsigned long   bb_state;
	struct rb_root  bb_free_root;
	unsigned short  bb_first_free;
	unsigned short  bb_free;
	unsigned short  bb_fragments;
	struct          list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
	void            *bb_bitmap;
#endif
	struct rw_semaphore alloc_sem;
	unsigned short  bb_counters[];
};

#define EXT4_GROUP_INFO_NEED_INIT_BIT	0
#define EXT4_GROUP_INFO_LOCKED_BIT	1

#define EXT4_MB_GRP_NEED_INIT(grp)	\
	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))

static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);

	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
}

static inline void ext4_unlock_group(struct super_block *sb,
					ext4_group_t group)
{
	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);

	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
}

static inline int ext4_is_group_locked(struct super_block *sb,
					ext4_group_t group)
{
	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);

	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
						&(grinfo->bb_state));
}

/*
 * Inodes and files operations
 */
+15 −15
Original line number Diff line number Diff line
@@ -457,8 +457,8 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
			blocknr += first + i;
			blocknr +=
			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);

			ext4_error(sb, __func__, "double-free of inode"
			ext4_grp_locked_error(sb, e4b->bd_group,
				   __func__, "double-free of inode"
				   " %lu's block %llu(bit %u in group %u)",
				   inode ? inode->i_ino : 0, blocknr,
				   first + i, e4b->bd_group);
@@ -702,7 +702,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
	grp->bb_fragments = fragments;

	if (free != grp->bb_free) {
		ext4_error(sb, __func__,
		ext4_grp_locked_error(sb, group,  __func__,
			"EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
			group, free, grp->bb_free);
		/*
@@ -1095,8 +1095,6 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)

static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
			  int first, int count)
__releases(bitlock)
__acquires(bitlock)
{
	int block = 0;
	int max = 0;
@@ -1135,12 +1133,11 @@ __acquires(bitlock)
			blocknr += block;
			blocknr +=
			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
			ext4_unlock_group(sb, e4b->bd_group);
			ext4_error(sb, __func__, "double-free of inode"
			ext4_grp_locked_error(sb, e4b->bd_group,
				   __func__, "double-free of inode"
				   " %lu's block %llu(bit %u in group %u)",
				   inode ? inode->i_ino : 0, blocknr, block,
				   e4b->bd_group);
			ext4_lock_group(sb, e4b->bd_group);
		}
		mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
		e4b->bd_info->bb_counters[order]++;
@@ -1623,7 +1620,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
			 * free blocks even though group info says we
			 * we have free blocks
			 */
			ext4_error(sb, __func__, "%d free blocks as per "
			ext4_grp_locked_error(sb, e4b->bd_group,
					__func__, "%d free blocks as per "
					"group info. But bitmap says 0",
					free);
			break;
@@ -1632,7 +1630,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
		mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
		BUG_ON(ex.fe_len <= 0);
		if (free < ex.fe_len) {
			ext4_error(sb, __func__, "%d free blocks as per "
			ext4_grp_locked_error(sb, e4b->bd_group,
					__func__, "%d free blocks as per "
					"group info. But got %d blocks",
					free, ex.fe_len);
			/*
@@ -3822,7 +3821,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
			pa, (unsigned long) pa->pa_lstart,
			(unsigned long) pa->pa_pstart,
			(unsigned long) pa->pa_len);
		ext4_error(sb, __func__, "free %u, pa_free %u",
		ext4_grp_locked_error(sb, group,
					__func__, "free %u, pa_free %u",
					free, pa->pa_free);
		/*
		 * pa is already deleted so we use the value obtained
@@ -4633,7 +4633,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
		else if (block >= (entry->start_blk + entry->count))
			n = &(*n)->rb_right;
		else {
			ext4_error(sb, __func__,
			ext4_grp_locked_error(sb, e4b->bd_group, __func__,
					"Double free of blocks %d (%d %d)",
					block, entry->start_blk, entry->count);
			return 0;
+0 −47
Original line number Diff line number Diff line
@@ -118,27 +118,6 @@ struct ext4_free_data {
	tid_t	t_tid;
};

struct ext4_group_info {
	unsigned long	bb_state;
	struct rb_root  bb_free_root;
	unsigned short	bb_first_free;
	unsigned short	bb_free;
	unsigned short	bb_fragments;
	struct		list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK
	void		*bb_bitmap;
#endif
	struct rw_semaphore alloc_sem;
	unsigned short	bb_counters[];
};

#define EXT4_GROUP_INFO_NEED_INIT_BIT	0
#define EXT4_GROUP_INFO_LOCKED_BIT	1

#define EXT4_MB_GRP_NEED_INIT(grp)	\
	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))


struct ext4_prealloc_space {
	struct list_head	pa_inode_list;
	struct list_head	pa_group_list;
@@ -264,32 +243,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)

struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);


static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{
	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);

	bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
}

static inline void ext4_unlock_group(struct super_block *sb,
					ext4_group_t group)
{
	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);

	bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
}

static inline int ext4_is_group_locked(struct super_block *sb,
					ext4_group_t group)
{
	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);

	return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
						&(grinfo->bb_state));
}

static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
					struct ext4_free_extent *fex)
{
+43 −2
Original line number Diff line number Diff line
@@ -366,6 +366,44 @@ void ext4_warning(struct super_block *sb, const char *function,
	va_end(args);
}

void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
				const char *function, const char *fmt, ...)
__releases(bitlock)
__acquires(bitlock)
{
	va_list args;
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;

	va_start(args, fmt);
	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
	vprintk(fmt, args);
	printk("\n");
	va_end(args);

	if (test_opt(sb, ERRORS_CONT)) {
		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
		ext4_commit_super(sb, es, 0);
		return;
	}
	ext4_unlock_group(sb, grp);
	ext4_handle_error(sb);
	/*
	 * We only get here in the ERRORS_RO case; relocking the group
	 * may be dangerous, but nothing bad will happen since the
	 * filesystem will have already been marked read/only and the
	 * journal has been aborted.  We return 1 as a hint to callers
	 * who might what to use the return value from
	 * ext4_grp_locked_error() to distinguish beween the
	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
	 * aggressively from the ext4 function in question, with a
	 * more appropriate error code.
	 */
	ext4_lock_group(sb, grp);
	return;
}


void ext4_update_dynamic_rev(struct super_block *sb)
{
	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -2868,8 +2906,11 @@ static void ext4_commit_super(struct super_block *sb,
		set_buffer_uptodate(sbh);
	}
	es->s_wtime = cpu_to_le32(get_seconds());
	ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
	es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
	ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
					&EXT4_SB(sb)->s_freeblocks_counter));
	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
					&EXT4_SB(sb)->s_freeinodes_counter));

	BUFFER_TRACE(sbh, "marking dirty");
	mark_buffer_dirty(sbh);
	if (sync) {