Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae67d9a8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "New features for 3.12:

   - Added aggressive extent caching using the extent status tree.  This
     can actually decrease memory usage in read-mostly workloads since
     the information is much more compactly stored in the extent status
     tree than if we had to keep the extent tree metadata blocks in the
     buffer cache.  This also improves Asynchronous I/O since it is it
     makes much less likely that we need to do metadata I/O to lookup
     the extent tree information.

   - Improve the recovery after corrupted allocation bitmaps are found
     when running in errors=ignore mode.

  Also fixed some writeback vs truncate races when using a blocksize
  less than the page size"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
  ext4: allow specifying external journal by pathname mount option
  ext4: mark group corrupt on group descriptor checksum
  ext4: mark block group as corrupt on inode bitmap error
  ext4: mark block group as corrupt on block bitmap error
  ext4: fix type declaration of ext4_validate_block_bitmap
  ext4: error out if verifying the block bitmap fails
  jbd2: Fix endian mixing problems in the checksumming code
  ext4: isolate ext4_extents.h file
  ext4: Fix misspellings using 'codespell' tool
  ext4: convert write_begin methods to stable_page_writes semantics
  ext4: fix use of potentially uninitialized variables in debugging code
  ext4: fix lost truncate due to race with writeback
  ext4: simplify truncation code in ext4_setattr()
  ext4: fix ext4_writepages() in presence of truncate
  ext4: move test whether extent to map can be extended to one place
  ext4: fix warning in ext4_da_update_reserve_space()
  quota: provide interface for readding allocated space into reserved space
  ext4: avoid reusing recently deleted inodes in no journal mode
  ext4: allocate delayed allocation blocks before rename
  ext4: start handle at least possible moment when renaming files
  ...
parents 71c7356f ad4eec61
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -144,11 +144,12 @@ journal_async_commit Commit block can be written to disk without waiting
			mount the device. This will enable 'journal_checksum'
			internally.

journal_path=path
journal_dev=devnum	When the external journal device's major/minor numbers
			have changed, this option allows the user to specify
			have changed, these options allow the user to specify
			the new journal location.  The journal device is
			identified through its new major/minor numbers encoded
			in devnum.
			identified through either its new major/minor numbers
			encoded in devnum, or via a path to the device.

norecovery		Don't load the journal on mounting.  Note that
noload			if the filesystem was not unmounted cleanly,
+1 −1
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)

/**
 * Check if the given dir-inode refers to an htree-indexed directory
 * (or a directory which chould potentially get coverted to use htree
 * (or a directory which could potentially get converted to use htree
 * indexing).
 *
 * Return 1 if it is a dx dir, 0 if not
+15 −9
Original line number Diff line number Diff line
@@ -184,6 +184,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	ext4_fsblk_t start, tmp;
	int flex_bg = 0;
	struct ext4_group_info *grp;

	J_ASSERT_BH(bh, buffer_locked(bh));

@@ -191,11 +192,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
	 * essentially implementing a per-group read-only flag. */
	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
		ext4_error(sb, "Checksum bad for group %u", block_group);
		ext4_free_group_clusters_set(sb, gdp, 0);
		ext4_free_inodes_set(sb, gdp, 0);
		ext4_itable_unused_set(sb, gdp, 0);
		memset(bh->b_data, 0xff, sb->s_blocksize);
		ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
		grp = ext4_get_group_info(sb, block_group);
		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
		return;
	}
	memset(bh->b_data, 0, sb->s_blocksize);
@@ -305,7 +304,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 */
static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
					    struct ext4_group_desc *desc,
					    unsigned int block_group,
					    ext4_group_t block_group,
					    struct buffer_head *bh)
{
	ext4_grpblk_t offset;
@@ -352,10 +351,11 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,

void ext4_validate_block_bitmap(struct super_block *sb,
			       struct ext4_group_desc *desc,
			       unsigned int block_group,
			       ext4_group_t block_group,
			       struct buffer_head *bh)
{
	ext4_fsblk_t	blk;
	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);

	if (buffer_verified(bh))
		return;
@@ -366,12 +366,14 @@ void ext4_validate_block_bitmap(struct super_block *sb,
		ext4_unlock_group(sb, block_group);
		ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
			   block_group, blk);
		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
		return;
	}
	if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
			desc, bh))) {
		ext4_unlock_group(sb, block_group);
		ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
		return;
	}
	set_buffer_verified(bh);
@@ -445,7 +447,10 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
	return bh;
verify:
	ext4_validate_block_bitmap(sb, desc, block_group, bh);
	if (buffer_verified(bh))
		return bh;
	put_bh(bh);
	return NULL;
}

/* Returns 0 on success, 1 on error */
@@ -469,7 +474,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
	clear_buffer_new(bh);
	/* Panic or remount fs read-only if block bitmap is invalid */
	ext4_validate_block_bitmap(sb, desc, block_group, bh);
	return 0;
	/* ...but check for error just in case errors=continue. */
	return !buffer_verified(bh);
}

struct buffer_head *
+1 −1
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ static int ext4_dx_readdir(struct file *, struct dir_context *);

/**
 * Check if the given dir-inode refers to an htree-indexed directory
 * (or a directory which chould potentially get coverted to use htree
 * (or a directory which could potentially get converted to use htree
 * indexing).
 *
 * Return 1 if it is a dx dir, 0 if not
+51 −7
Original line number Diff line number Diff line
@@ -560,6 +560,18 @@ enum {
	/* Do not put hole in extent cache */
#define EXT4_GET_BLOCKS_NO_PUT_HOLE		0x0200

/*
 * The bit position of these flags must not overlap with any of the
 * EXT4_GET_BLOCKS_*.  They are used by ext4_ext_find_extent(),
 * read_extent_tree_block(), ext4_split_extent_at(),
 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
 * caching the extents when reading from the extent tree while a
 * truncate or punch hole operation is in progress.
 */
#define EXT4_EX_NOCACHE				0x0400
#define EXT4_EX_FORCE_CACHE			0x0800

/*
 * Flags used by ext4_free_blocks
 */
@@ -569,6 +581,7 @@ enum {
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE	0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER	0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER	0x0020
#define EXT4_FREE_BLOCKS_RESERVE		0x0040

/*
 * ioctl commands
@@ -590,6 +603,7 @@ enum {
#define EXT4_IOC_MOVE_EXT		_IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS		_IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT		_IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS	_IO('f', 18)

#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -1375,6 +1389,7 @@ enum {
					   nolocking */
	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
	EXT4_STATE_ORDERED_MODE,	/* data=ordered mode */
	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
};

#define EXT4_INODE_BIT_FNS(name, field, offset)				\
@@ -1915,7 +1930,7 @@ extern ext4_group_t ext4_get_group_number(struct super_block *sb,

extern void ext4_validate_block_bitmap(struct super_block *sb,
				       struct ext4_group_desc *desc,
				       unsigned int block_group,
				       ext4_group_t block_group,
				       struct buffer_head *bh);
extern unsigned int ext4_block_group(struct super_block *sb,
			ext4_fsblk_t blocknr);
@@ -2417,16 +2432,32 @@ do { \
#define EXT4_FREECLUSTERS_WATERMARK 0
#endif

/* Update i_disksize. Requires i_mutex to avoid races with truncate */
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
	WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
		     !mutex_is_locked(&inode->i_mutex));
	down_write(&EXT4_I(inode)->i_data_sem);
	if (newsize > EXT4_I(inode)->i_disksize)
		EXT4_I(inode)->i_disksize = newsize;
	up_write(&EXT4_I(inode)->i_data_sem);
}

/*
	 * XXX: replace with spinlock if seen contended -bzzz
 * Update i_disksize after writeback has been started. Races with truncate
 * are avoided by checking i_size under i_data_sem.
 */
static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
{
	loff_t i_size;

	down_write(&EXT4_I(inode)->i_data_sem);
	i_size = i_size_read(inode);
	if (newsize > i_size)
		newsize = i_size;
	if (newsize > EXT4_I(inode)->i_disksize)
		EXT4_I(inode)->i_disksize = newsize;
	up_write(&EXT4_I(inode)->i_data_sem);
	return ;
}

struct ext4_group_info {
@@ -2449,9 +2480,15 @@ struct ext4_group_info {

#define EXT4_GROUP_INFO_NEED_INIT_BIT		0
#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT		1
#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT	2
#define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT	3

#define EXT4_MB_GRP_NEED_INIT(grp)	\
	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_BBITMAP_CORRUPT(grp)	\
	(test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_IBITMAP_CORRUPT(grp)	\
	(test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state)))

#define EXT4_MB_GRP_WAS_TRIMMED(grp)	\
	(test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
@@ -2655,6 +2692,12 @@ extern int ext4_check_blockref(const char *, unsigned int,
struct ext4_ext_path;
struct ext4_extent;

/*
 * Maximum number of logical blocks in a file; ext4_extent's ee_block is
 * __le32.
 */
#define EXT_MAX_BLOCKS	0xffffffff

extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
@@ -2684,7 +2727,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
				  struct ext4_ext_path *,
				  struct ext4_extent *, int);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
						  struct ext4_ext_path *);
						  struct ext4_ext_path *,
						  int flags);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_range(struct inode *inode,
@@ -2693,7 +2737,7 @@ extern int ext4_find_delalloc_range(struct inode *inode,
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			__u64 start, __u64 len);

extern int ext4_ext_precache(struct inode *inode);

/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
Loading