Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c2661b80 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "A large number of cleanups and bug fixes, with some (minor) journal
  optimizations"

[ This got sent to me before -rc1, but was stuck in my spam folder.   - Linus ]

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (67 commits)
  ext4: check s_chksum_driver when looking for bg csum presence
  ext4: move error report out of atomic context in ext4_init_block_bitmap()
  ext4: Replace open coded mdata csum feature to helper function
  ext4: delete useless comments about ext4_move_extents
  ext4: fix reservation overflow in ext4_da_write_begin
  ext4: add ext4_iget_normal() which is to be used for dir tree lookups
  ext4: don't orphan or truncate the boot loader inode
  ext4: grab missed write_count for EXT4_IOC_SWAP_BOOT
  ext4: optimize block allocation on grow indepth
  ext4: get rid of code duplication
  ext4: fix over-defensive complaint after journal abort
  ext4: fix return value of ext4_do_update_inode
  ext4: fix mmap data corruption when blocksize < pagesize
  vfs: fix data corruption when blocksize < pagesize for mmaped data
  ext4: fold ext4_nojournal_sops into ext4_sops
  ext4: support freezing ext2 (nojournal) file systems
  ext4: fold ext4_sync_fs_nojournal() into ext4_sync_fs()
  ext4: don't check quota format when there are no quota files
  jbd2: simplify calling convention around __jbd2_journal_clean_checkpoint_list
  jbd2: avoid pointless scanning of checkpoint lists
  ...
parents f114040e 813d32f9
Loading
Loading
Loading
Loading
+29 −19
Original line number Diff line number Diff line
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
 */
static int
grow_dev_page(struct block_device *bdev, sector_t block,
		pgoff_t index, int size, int sizebits)
	      pgoff_t index, int size, int sizebits, gfp_t gfp)
{
	struct inode *inode = bdev->bd_inode;
	struct page *page;
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
	int ret = 0;		/* Will call free_more_memory() */
	gfp_t gfp_mask;

	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
	gfp_mask |= __GFP_MOVABLE;
	gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;

	/*
	 * XXX: __getblk_slow() can not really deal with failure and
	 * will endlessly loop on improvised global reclaim.  Prefer
@@ -1060,7 +1060,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 * that page was dirty, the buffers are set dirty also.
 */
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
{
	pgoff_t index;
	int sizebits;
@@ -1087,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
	}

	/* Create a page with the proper size buffers.. */
	return grow_dev_page(bdev, block, index, size, sizebits);
	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
}

static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size)
struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block,
	     unsigned size, gfp_t gfp)
{
	/* Size must be multiple of hard sectorsize */
	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1113,13 +1114,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
		if (bh)
			return bh;

		ret = grow_buffers(bdev, block, size);
		ret = grow_buffers(bdev, block, size, gfp);
		if (ret < 0)
			return NULL;
		if (ret == 0)
			free_more_memory();
	}
}
EXPORT_SYMBOL(__getblk_slow);

/*
 * The relationship between dirty buffers and dirty pages:
@@ -1373,24 +1375,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
EXPORT_SYMBOL(__find_get_block);

/*
 * __getblk will locate (and, if necessary, create) the buffer_head
 * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
 * which corresponds to the passed block_device, block and size. The
 * returned buffer has its reference count incremented.
 *
 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
 * attempt is failing.  FIXME, perhaps?
 * __getblk_gfp() will lock up the machine if grow_dev_page's
 * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
 */
struct buffer_head *
__getblk(struct block_device *bdev, sector_t block, unsigned size)
__getblk_gfp(struct block_device *bdev, sector_t block,
	     unsigned size, gfp_t gfp)
{
	struct buffer_head *bh = __find_get_block(bdev, block, size);

	might_sleep();
	if (bh == NULL)
		bh = __getblk_slow(bdev, block, size);
		bh = __getblk_slow(bdev, block, size, gfp);
	return bh;
}
EXPORT_SYMBOL(__getblk);
EXPORT_SYMBOL(__getblk_gfp);

/*
 * Do async read-ahead on a buffer..
@@ -1406,24 +1409,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
EXPORT_SYMBOL(__breadahead);

/**
 *  __bread() - reads a specified block and returns the bh
 *  __bread_gfp() - reads a specified block and returns the bh
 *  @bdev: the block_device to read from
 *  @block: number of block
 *  @size: size (in bytes) to read
 *  @gfp: page allocation flag
 *
 *  Reads a specified block, and returns buffer head that contains it.
 *  The page cache can be allocated from non-movable area
 *  not to prevent page migration if you set gfp to zero.
 *  It returns NULL if the block was unreadable.
 */
struct buffer_head *
__bread(struct block_device *bdev, sector_t block, unsigned size)
__bread_gfp(struct block_device *bdev, sector_t block,
		   unsigned size, gfp_t gfp)
{
	struct buffer_head *bh = __getblk(bdev, block, size);
	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);

	if (likely(bh) && !buffer_uptodate(bh))
		bh = __bread_slow(bh);
	return bh;
}
EXPORT_SYMBOL(__bread);
EXPORT_SYMBOL(__bread_gfp);

/*
 * invalidate_bh_lrus() is called rarely - but not only at unmount.
@@ -2082,6 +2089,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
			struct page *page, void *fsdata)
{
	struct inode *inode = mapping->host;
	loff_t old_size = inode->i_size;
	int i_size_changed = 0;

	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2101,6 +2109,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
	unlock_page(page);
	page_cache_release(page);

	if (old_size < pos)
		pagecache_isize_extended(inode, old_size, pos);
	/*
	 * Don't mark the inode dirty under page lock. First, it unnecessarily
	 * makes the holding time of page lock longer. Second, it forces lock
+9 −6
Original line number Diff line number Diff line
@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
}

/* Initializes an uninitialized block bitmap */
static void ext4_init_block_bitmap(struct super_block *sb,
static int ext4_init_block_bitmap(struct super_block *sb,
				   struct buffer_head *bh,
				   ext4_group_t block_group,
				   struct ext4_group_desc *gdp)
@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb,
	/* If checksum is bad mark all blocks used to prevent allocation
	 * essentially implementing a per-group read-only flag. */
	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
		ext4_error(sb, "Checksum bad for group %u", block_group);
		grp = ext4_get_group_info(sb, block_group);
		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
			percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
					   count);
		}
		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
		return;
		return -EIO;
	}
	memset(bh->b_data, 0, sb->s_blocksize);

@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
			     sb->s_blocksize * 8, bh->b_data);
	ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
	ext4_group_desc_csum_set(sb, block_group, gdp);
	return 0;
}

/* Return the number of free blocks in a block group.  It is used when
@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
	}
	ext4_lock_group(sb, block_group);
	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
		ext4_init_block_bitmap(sb, bh, block_group, desc);
		int err;

		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
		set_bitmap_uptodate(bh);
		set_buffer_uptodate(bh);
		ext4_unlock_group(sb, block_group);
		unlock_buffer(bh);
		if (err)
			ext4_error(sb, "Checksum bad for grp %u", block_group);
		return bh;
	}
	ext4_unlock_group(sb, block_group);
@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
	 * Account for the allocated meta blocks.  We will never
	 * fail EDQUOT for metdata, but we do account for it.
	 */
	if (!(*errp) &&
	    ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
	if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
		spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
		dquot_alloc_block_nofail(inode,
+4 −8
Original line number Diff line number Diff line
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
	__u32 provided, calculated;
	struct ext4_sb_info *sbi = EXT4_SB(sb);

	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
	if (!ext4_has_metadata_csum(sb))
		return 1;

	provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
	__u32 csum;
	struct ext4_sb_info *sbi = EXT4_SB(sb);

	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
	if (!ext4_has_metadata_csum(sb))
		return;

	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;

	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
	if (!ext4_has_metadata_csum(sb))
		return 1;

	provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
	__u32 csum;
	struct ext4_sb_info *sbi = EXT4_SB(sb);

	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
	if (!ext4_has_metadata_csum(sb))
		return;

	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
+3 −5
Original line number Diff line number Diff line
@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
					&file->f_ra, file,
					index, 1);
			file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
			bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
			bh = ext4_bread(NULL, inode, map.m_lblk, 0);
			if (IS_ERR(bh))
				return PTR_ERR(bh);
		}

		/*
		 * We ignore I/O errors on directories so users have a chance
		 * of recovering data when there's a bad sector
		 */
		if (!bh) {
			if (!dir_has_error) {
				EXT4_ERROR_FILE(file, 0,
+31 −19
Original line number Diff line number Diff line
@@ -572,15 +572,15 @@ enum {

/*
 * The bit position of these flags must not overlap with any of the
 * EXT4_GET_BLOCKS_*.  They are used by ext4_ext_find_extent(),
 * EXT4_GET_BLOCKS_*.  They are used by ext4_find_extent(),
 * read_extent_tree_block(), ext4_split_extent_at(),
 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
 * caching the extents when reading from the extent tree while a
 * truncate or punch hole operation is in progress.
 */
#define EXT4_EX_NOCACHE				0x0400
#define EXT4_EX_FORCE_CACHE			0x0800
#define EXT4_EX_NOCACHE				0x40000000
#define EXT4_EX_FORCE_CACHE			0x20000000

/*
 * Flags used by ext4_free_blocks
@@ -890,6 +890,7 @@ struct ext4_inode_info {
	struct ext4_es_tree i_es_tree;
	rwlock_t i_es_lock;
	struct list_head i_es_lru;
	unsigned int i_es_all_nr;	/* protected by i_es_lock */
	unsigned int i_es_lru_nr;	/* protected by i_es_lock */
	unsigned long i_touch_when;	/* jiffies of last accessing */

@@ -1174,6 +1175,9 @@ struct ext4_super_block {
#define EXT4_MF_MNTDIR_SAMPLED	0x0001
#define EXT4_MF_FS_ABORTED	0x0002	/* Fatal error detected */

/* Number of quota types we support */
#define EXT4_MAXQUOTAS 2

/*
 * fourth extended-fs super-block data in memory
 */
@@ -1237,7 +1241,7 @@ struct ext4_sb_info {
	u32 s_min_batch_time;
	struct block_device *journal_bdev;
#ifdef CONFIG_QUOTA
	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
	char *s_qf_names[EXT4_MAXQUOTAS];	/* Names of quota files with journalled quota */
	int s_jquota_fmt;			/* Format of quota to use */
#endif
	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
@@ -1330,8 +1334,7 @@ struct ext4_sb_info {
	/* Reclaim extents from extent status tree */
	struct shrinker s_es_shrinker;
	struct list_head s_es_lru;
	unsigned long s_es_last_sorted;
	struct percpu_counter s_extent_cache_cnt;
	struct ext4_es_stats s_es_stats;
	struct mb_cache *s_mb_cache;
	spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;

@@ -1399,7 +1402,6 @@ enum {
	EXT4_STATE_EXT_MIGRATE,		/* Inode is migrating */
	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
	EXT4_STATE_NEWENTRY,		/* File just added to dir */
	EXT4_STATE_DELALLOC_RESERVED,	/* blks already reserved for delalloc */
	EXT4_STATE_DIOREAD_LOCK,	/* Disable support for dio read
					   nolocking */
	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
@@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);

/* inode.c */
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
						ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
						ext4_lblk_t, int, int *);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
			 struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle,
#define CONVERT_INLINE_DATA	 2

extern struct inode *ext4_iget(struct super_block *, unsigned long);
extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int  ext4_write_inode(struct inode *, struct writeback_control *);
extern int  ext4_setattr(struct dentry *, struct iattr *);
extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb,
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
	return EXT4_HAS_RO_COMPAT_FEATURE(sb,
					  EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
					  EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
					  EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
	       (EXT4_SB(sb)->s_chksum_driver != NULL);
}

static inline int ext4_has_metadata_csum(struct super_block *sb)
{
	WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
		     !EXT4_SB(sb)->s_chksum_driver);

	return (EXT4_SB(sb)->s_chksum_driver != NULL);
}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
	return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2731,10 +2740,10 @@ extern int ext4_can_extents_be_merged(struct inode *inode,
				      struct ext4_extent *ex1,
				      struct ext4_extent *ex2);
extern int ext4_ext_insert_extent(handle_t *, struct inode *,
				  struct ext4_ext_path *,
				  struct ext4_ext_path **,
				  struct ext4_extent *, int);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
						  struct ext4_ext_path *,
extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
					      struct ext4_ext_path **,
					      int flags);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
@@ -2742,10 +2751,15 @@ extern int ext4_find_delalloc_range(struct inode *inode,
				    ext4_lblk_t lblk_start,
				    ext4_lblk_t lblk_end);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			__u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
				struct inode *inode2, ext4_lblk_t lblk1,
			     ext4_lblk_t lblk2,  ext4_lblk_t count,
			     int mark_unwritten,int *err);

/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
			     __u64 start_orig, __u64 start_donor,
			     __u64 len, __u64 *moved_len);
extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
			    struct ext4_extent **extent);

/* page-io.c */
extern int __init ext4_init_pageio(void);
Loading