Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6432f212 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "The big new feature added this time is supporting online resizing
  using the meta_bg feature.  This allows us to resize file systems
  which are greater than 16TB.  In addition, the speed of online
  resizing has been improved in general.

  We also fix a number of races, some of which could lead to deadlocks,
  in ext4's Asynchronous I/O and online defrag support, thanks to good
  work by Dmitry Monakhov.

  There are also a large number of more minor bug fixes and cleanups
  from a number of other ext4 contributors, quite of few of which have
  submitted fixes for the first time."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits)
  ext4: fix ext4_flush_completed_IO wait semantics
  ext4: fix mtime update in nodelalloc mode
  ext4: fix ext_remove_space for punch_hole case
  ext4: punch_hole should wait for DIO writers
  ext4: serialize truncate with owerwrite DIO workers
  ext4: endless truncate due to nonlocked dio readers
  ext4: serialize unlocked dio reads with truncate
  ext4: serialize dio nonlocked reads with defrag workers
  ext4: completed_io locking cleanup
  ext4: fix unwritten counter leakage
  ext4: give i_aiodio_unwritten a more appropriate name
  ext4: ext4_inode_info diet
  ext4: convert to use leXX_add_cpu()
  ext4: ext4_bread usage audit
  fs: reserve fallocate flag codepoint
  ext4: remove redundant offset check in mext_check_arguments()
  ext4: don't clear orphan list on ro mount with errors
  jbd2: fix assertion failure in commit code due to lacking transaction credits
  ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails
  ext4: enable FITRIM ioctl on bigalloc file system
  ...
parents 1b033447 c278531d
Loading
Loading
Loading
Loading
+13 −0
Original line number Original line Diff line number Diff line
@@ -96,3 +96,16 @@ Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
Description:
		The maximum number of megabytes the writeback code will
		The maximum number of megabytes the writeback code will
		try to write out before move on to another inode.
		try to write out before move on to another inode.

What:		/sys/fs/ext4/<disk>/extent_max_zeroout_kb
Date:		August 2012
Contact:	"Theodore Ts'o" <tytso@mit.edu>
Description:
		The maximum number of kilobytes which will be zeroed
		out in preference to creating a new uninitialized
		extent when manipulating an inode's extent tree.  Note
		that using a larger value will increase the
		variability of time necessary to complete a random
		write operation (since a 4k random write might turn
		into a much larger write due to the zeroout
		operation).
+10 −0
Original line number Original line Diff line number Diff line
@@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified
			Because of the restrictions this options comprises
			Because of the restrictions this options comprises
			it is off by default (e.g. dioread_lock).
			it is off by default (e.g. dioread_lock).


max_dir_size_kb=n	This limits the size of directories so that any
			attempt to expand them beyond the specified
			limit in kilobytes will cause an ENOSPC error.
			This is useful in memory constrained
			environments, where a very large directory can
			cause severe performance problems or even
			provoke the Out Of Memory killer.  (For example,
			if there is only 512mb memory available, a 176mb
			directory may seriously cramp the system's style.)

i_version		Enable 64-bit inode version support. This option is
i_version		Enable 64-bit inode version support. This option is
			off by default.
			off by default.


+7 −6
Original line number Original line Diff line number Diff line
@@ -2312,12 +2312,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
	loff_t size;
	loff_t size;
	int ret;
	int ret;


	/*
	 * Update file times before taking page lock. We may end up failing the
	 * fault so this update may be superfluous but who really cares...
	 */
	file_update_time(vma->vm_file);

	lock_page(page);
	lock_page(page);
	size = i_size_read(inode);
	size = i_size_read(inode);
	if ((page->mapping != inode->i_mapping) ||
	if ((page->mapping != inode->i_mapping) ||
@@ -2355,6 +2349,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
	struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
	struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;


	sb_start_pagefault(sb);
	sb_start_pagefault(sb);

	/*
	 * Update file times before taking page lock. We may end up failing the
	 * fault so this update may be superfluous but who really cares...
	 */
	file_update_time(vma->vm_file);

	ret = __block_page_mkwrite(vma, vmf, get_block);
	ret = __block_page_mkwrite(vma, vmf, get_block);
	sb_end_pagefault(sb);
	sb_end_pagefault(sb);
	return block_page_mkwrite_return(ret);
	return block_page_mkwrite_return(ret);
+42 −7
Original line number Original line Diff line number Diff line
@@ -186,7 +186,6 @@ struct mpage_da_data {
#define EXT4_IO_END_ERROR	0x0002
#define EXT4_IO_END_ERROR	0x0002
#define EXT4_IO_END_QUEUED	0x0004
#define EXT4_IO_END_QUEUED	0x0004
#define EXT4_IO_END_DIRECT	0x0008
#define EXT4_IO_END_DIRECT	0x0008
#define EXT4_IO_END_IN_FSYNC	0x0010


struct ext4_io_page {
struct ext4_io_page {
	struct page	*p_page;
	struct page	*p_page;
@@ -912,9 +911,7 @@ struct ext4_inode_info {
	struct list_head i_completed_io_list;
	struct list_head i_completed_io_list;
	spinlock_t i_completed_io_lock;
	spinlock_t i_completed_io_lock;
	atomic_t i_ioend_count;	/* Number of outstanding io_end structs */
	atomic_t i_ioend_count;	/* Number of outstanding io_end structs */
	/* current io_end structure for async DIO write*/
	atomic_t i_unwritten; /* Nr. of inflight conversions pending */
	ext4_io_end_t *cur_aio_dio;
	atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */


	spinlock_t i_block_reservation_lock;
	spinlock_t i_block_reservation_lock;


@@ -1233,6 +1230,7 @@ struct ext4_sb_info {
	spinlock_t s_md_lock;
	spinlock_t s_md_lock;
	unsigned short *s_mb_offsets;
	unsigned short *s_mb_offsets;
	unsigned int *s_mb_maxs;
	unsigned int *s_mb_maxs;
	unsigned int s_group_info_size;


	/* tunables */
	/* tunables */
	unsigned long s_stripe;
	unsigned long s_stripe;
@@ -1243,6 +1241,7 @@ struct ext4_sb_info {
	unsigned int s_mb_order2_reqs;
	unsigned int s_mb_order2_reqs;
	unsigned int s_mb_group_prealloc;
	unsigned int s_mb_group_prealloc;
	unsigned int s_max_writeback_mb_bump;
	unsigned int s_max_writeback_mb_bump;
	unsigned int s_max_dir_size_kb;
	/* where last allocation was done - for stream allocation */
	/* where last allocation was done - for stream allocation */
	unsigned long s_mb_last_group;
	unsigned long s_mb_last_group;
	unsigned long s_mb_last_start;
	unsigned long s_mb_last_start;
@@ -1270,8 +1269,12 @@ struct ext4_sb_info {
	unsigned long s_sectors_written_start;
	unsigned long s_sectors_written_start;
	u64 s_kbytes_written;
	u64 s_kbytes_written;


	/* the size of zero-out chunk */
	unsigned int s_extent_max_zeroout_kb;

	unsigned int s_log_groups_per_flex;
	unsigned int s_log_groups_per_flex;
	struct flex_groups *s_flex_groups;
	struct flex_groups *s_flex_groups;
	ext4_group_t s_flex_groups_allocated;


	/* workqueue for dio unwritten */
	/* workqueue for dio unwritten */
	struct workqueue_struct *dio_unwritten_wq;
	struct workqueue_struct *dio_unwritten_wq;
@@ -1328,8 +1331,18 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
{
{
	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
		io_end->flag |= EXT4_IO_END_UNWRITTEN;
		io_end->flag |= EXT4_IO_END_UNWRITTEN;
		atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
		atomic_inc(&EXT4_I(inode)->i_unwritten);
	}
}
}

static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
{
	return inode->i_private;
}

static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
{
	inode->i_private = io;
}
}


/*
/*
@@ -1345,6 +1358,8 @@ enum {
	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
	EXT4_STATE_NEWENTRY,		/* File just added to dir */
	EXT4_STATE_NEWENTRY,		/* File just added to dir */
	EXT4_STATE_DELALLOC_RESERVED,	/* blks already reserved for delalloc */
	EXT4_STATE_DELALLOC_RESERVED,	/* blks already reserved for delalloc */
	EXT4_STATE_DIOREAD_LOCK,	/* Disable support for dio read
					   nolocking */
};
};


#define EXT4_INODE_BIT_FNS(name, field, offset)				\
#define EXT4_INODE_BIT_FNS(name, field, offset)				\
@@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);


/* fsync.c */
/* fsync.c */
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
extern int ext4_flush_completed_IO(struct inode *);
extern int ext4_flush_unwritten_io(struct inode *);


/* hash.c */
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
			     struct buffer_head *bh, ext4_fsblk_t block,
			     struct buffer_head *bh, ext4_fsblk_t block,
			     unsigned long count, int flags);
			     unsigned long count, int flags);
extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
				   ext4_group_t ngroups);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
extern int ext4_mb_add_groupinfo(struct super_block *sb,
		ext4_group_t i, struct ext4_group_desc *desc);
		ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
@@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb,
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void ext4_kvfree(void *ptr);
extern void ext4_kvfree(void *ptr);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
				    ext4_group_t ngroup);
extern __printf(4, 5)
extern __printf(4, 5)
void __ext4_error(struct super_block *, const char *, unsigned int,
void __ext4_error(struct super_block *, const char *, unsigned int,
		  const char *, ...);
		  const char *, ...);
@@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
extern const struct inode_operations ext4_file_inode_operations;
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern void ext4_unwritten_wait(struct inode *inode);


/* namei.c */
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
extern const struct inode_operations ext4_dir_inode_operations;
@@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,


/* page-io.c */
/* page-io.c */
extern int __init ext4_init_pageio(void);
extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void);
extern void ext4_exit_pageio(void);
extern void ext4_ioend_wait(struct inode *);
extern void ext4_ioend_wait(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern int ext4_end_io_nolock(ext4_io_end_t *io);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
extern int ext4_bio_write_page(struct ext4_io_submit *io,
			       struct page *page,
			       struct page *page,
@@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
	set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
	set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
}
}


/*
 * Disable DIO read nolock optimization, so new dioreaders will be forced
 * to grab i_mutex
 */
static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
{
	ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
	smp_mb();
}
static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
{
	smp_mb();
	ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
}

#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)


/* For ioend & aio unwritten conversion wait queues */
/* For ioend & aio unwritten conversion wait queues */
+151 −107
Original line number Original line Diff line number Diff line
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));


	neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
	le16_add_cpu(&neh->eh_depth, 1);
	ext4_mark_inode_dirty(handle, inode);
	ext4_mark_inode_dirty(handle, inode);
out:
out:
	brelse(bh);
	brelse(bh);
@@ -1655,17 +1655,61 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
	return merge_done;
	return merge_done;
}
}


/*
 * This function does a very simple check to see if we can collapse
 * an extent tree with a single extent tree leaf block into the inode.
 */
static void ext4_ext_try_to_merge_up(handle_t *handle,
				     struct inode *inode,
				     struct ext4_ext_path *path)
{
	size_t s;
	unsigned max_root = ext4_ext_space_root(inode, 0);
	ext4_fsblk_t blk;

	if ((path[0].p_depth != 1) ||
	    (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
	    (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
		return;

	/*
	 * We need to modify the block allocation bitmap and the block
	 * group descriptor to release the extent tree block.  If we
	 * can't get the journal credits, give up.
	 */
	if (ext4_journal_extend(handle, 2))
		return;

	/*
	 * Copy the extent data up to the inode
	 */
	blk = ext4_idx_pblock(path[0].p_idx);
	s = le16_to_cpu(path[1].p_hdr->eh_entries) *
		sizeof(struct ext4_extent_idx);
	s += sizeof(struct ext4_extent_header);

	memcpy(path[0].p_hdr, path[1].p_hdr, s);
	path[0].p_depth = 0;
	path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
		(path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
	path[0].p_hdr->eh_max = cpu_to_le16(max_root);

	brelse(path[1].p_bh);
	ext4_free_blocks(handle, inode, NULL, blk, 1,
			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
}

/*
/*
 * This function tries to merge the @ex extent to neighbours in the tree.
 * This function tries to merge the @ex extent to neighbours in the tree.
 * return 1 if merge left else 0.
 * return 1 if merge left else 0.
 */
 */
static int ext4_ext_try_to_merge(struct inode *inode,
static void ext4_ext_try_to_merge(handle_t *handle,
				  struct inode *inode,
				  struct ext4_ext_path *path,
				  struct ext4_ext_path *path,
				  struct ext4_extent *ex) {
				  struct ext4_extent *ex) {
	struct ext4_extent_header *eh;
	struct ext4_extent_header *eh;
	unsigned int depth;
	unsigned int depth;
	int merge_done = 0;
	int merge_done = 0;
	int ret = 0;


	depth = ext_depth(inode);
	depth = ext_depth(inode);
	BUG_ON(path[depth].p_hdr == NULL);
	BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
		merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
		merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);


	if (!merge_done)
	if (!merge_done)
		ret = ext4_ext_try_to_merge_right(inode, path, ex);
		(void) ext4_ext_try_to_merge_right(inode, path, ex);


	return ret;
	ext4_ext_try_to_merge_up(handle, inode, path);
}
}


/*
/*
@@ -1893,7 +1937,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
merge:
merge:
	/* try to merge extents */
	/* try to merge extents */
	if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
	if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
		ext4_ext_try_to_merge(inode, path, nearex);
		ext4_ext_try_to_merge(handle, inode, path, nearex);




	/* time to correct all indexes above */
	/* time to correct all indexes above */
@@ -1901,7 +1945,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
	if (err)
	if (err)
		goto cleanup;
		goto cleanup;


	err = ext4_ext_dirty(handle, inode, path + depth);
	err = ext4_ext_dirty(handle, inode, path + path->p_depth);


cleanup:
cleanup:
	if (npath) {
	if (npath) {
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}
}


/*
/*
 * ext4_ext_check_cache()
 * ext4_ext_in_cache()
 * Checks to see if the given block is in the cache.
 * Checks to see if the given block is in the cache.
 * If it is, the cached extent is stored in the given
 * If it is, the cached extent is stored in the given
 * cache extent pointer.  If the cached extent is a hole,
 * cache extent pointer.
 * this routine should be used instead of
 * ext4_ext_in_cache if the calling function needs to
 * know the size of the hole.
 *
 *
 * @inode: The files inode
 * @inode: The files inode
 * @block: The block to look for in the cache
 * @block: The block to look for in the cache
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 *
 *
 * Return 0 if cache is invalid; 1 if the cache is valid
 * Return 0 if cache is invalid; 1 if the cache is valid
 */
 */
static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
static int
	struct ext4_ext_cache *ex){
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
		  struct ext4_extent *ex)
{
	struct ext4_ext_cache *cex;
	struct ext4_ext_cache *cex;
	struct ext4_sb_info *sbi;
	struct ext4_sb_info *sbi;
	int ret = 0;
	int ret = 0;
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
		goto errout;
		goto errout;


	if (in_range(block, cex->ec_block, cex->ec_len)) {
	if (in_range(block, cex->ec_block, cex->ec_len)) {
		memcpy(ex, cex, sizeof(struct ext4_ext_cache));
		ex->ee_block = cpu_to_le32(cex->ec_block);
		ext4_ext_store_pblock(ex, cex->ec_start);
		ex->ee_len = cpu_to_le16(cex->ec_len);
		ext_debug("%u cached by %u:%u:%llu\n",
		ext_debug("%u cached by %u:%u:%llu\n",
				block,
				block,
				cex->ec_block, cex->ec_len, cex->ec_start);
				cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2137,37 +2182,6 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
	return ret;
	return ret;
}
}


/*
 * ext4_ext_in_cache()
 * Checks to see if the given block is in the cache.
 * If it is, the cached extent is stored in the given
 * extent pointer.
 *
 * @inode: The files inode
 * @block: The block to look for in the cache
 * @ex:    Pointer where the cached extent will be stored
 *         if it contains block
 *
 * Return 0 if cache is invalid; 1 if the cache is valid
 */
static int
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
			struct ext4_extent *ex)
{
	struct ext4_ext_cache cex;
	int ret = 0;

	if (ext4_ext_check_cache(inode, block, &cex)) {
		ex->ee_block = cpu_to_le32(cex.ec_block);
		ext4_ext_store_pblock(ex, cex.ec_start);
		ex->ee_len = cpu_to_le16(cex.ec_len);
		ret = 1;
	}

	return ret;
}


/*
/*
 * ext4_ext_rm_idx:
 * ext4_ext_rm_idx:
 * removes index from the index block.
 * removes index from the index block.
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
	unsigned short ee_len =  ext4_ext_get_actual_len(ex);
	ext4_fsblk_t pblk;
	ext4_fsblk_t pblk;
	int flags = EXT4_FREE_BLOCKS_FORGET;
	int flags = 0;


	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
		flags |= EXT4_FREE_BLOCKS_METADATA;
		flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
	else if (ext4_should_journal_data(inode))
		flags |= EXT4_FREE_BLOCKS_FORGET;

	/*
	/*
	 * For bigalloc file systems, we never free a partial cluster
	 * For bigalloc file systems, we never free a partial cluster
	 * at the beginning of the extent.  Instead, we make a note
	 * at the beginning of the extent.  Instead, we make a note
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
	struct ext4_ext_path *path = NULL;
	struct ext4_ext_path *path = NULL;
	ext4_fsblk_t partial_cluster = 0;
	ext4_fsblk_t partial_cluster = 0;
	handle_t *handle;
	handle_t *handle;
	int i = 0, err;
	int i = 0, err = 0;


	ext_debug("truncate since %u to %u\n", start, end);
	ext_debug("truncate since %u to %u\n", start, end);


@@ -2604,12 +2621,16 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
			return PTR_ERR(path);
			return PTR_ERR(path);
		}
		}
		depth = ext_depth(inode);
		depth = ext_depth(inode);
		/* Leaf not may not exist only if inode has no blocks at all */
		ex = path[depth].p_ext;
		ex = path[depth].p_ext;
		if (!ex) {
		if (!ex) {
			ext4_ext_drop_refs(path);
			if (depth) {
			kfree(path);
				EXT4_ERROR_INODE(inode,
			path = NULL;
						 "path[%d].p_hdr == NULL",
			goto cont;
						 depth);
				err = -EIO;
			}
			goto out;
		}
		}


		ee_block = le32_to_cpu(ex->ee_block);
		ee_block = le32_to_cpu(ex->ee_block);
@@ -2641,8 +2662,6 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
				goto out;
				goto out;
		}
		}
	}
	}
cont:

	/*
	/*
	 * We start scanning from right side, freeing all the blocks
	 * We start scanning from right side, freeing all the blocks
	 * after i_size and walking into the tree depth-wise.
	 * after i_size and walking into the tree depth-wise.
@@ -2924,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
			ext4_ext_mark_initialized(ex);
			ext4_ext_mark_initialized(ex);


		if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
		if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
			ext4_ext_try_to_merge(inode, path, ex);
			ext4_ext_try_to_merge(handle, inode, path, ex);


		err = ext4_ext_dirty(handle, inode, path + depth);
		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
		goto out;
		goto out;
	}
	}


@@ -2958,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
			goto fix_extent_len;
			goto fix_extent_len;
		/* update the extent length and mark as initialized */
		/* update the extent length and mark as initialized */
		ex->ee_len = cpu_to_le16(ee_len);
		ex->ee_len = cpu_to_le16(ee_len);
		ext4_ext_try_to_merge(inode, path, ex);
		ext4_ext_try_to_merge(handle, inode, path, ex);
		err = ext4_ext_dirty(handle, inode, path + depth);
		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
		goto out;
		goto out;
	} else if (err)
	} else if (err)
		goto fix_extent_len;
		goto fix_extent_len;
@@ -3041,7 +3060,6 @@ static int ext4_split_extent(handle_t *handle,
	return err ? err : map->m_len;
	return err ? err : map->m_len;
}
}


#define EXT4_EXT_ZERO_LEN 7
/*
/*
 * This function is called by ext4_ext_map_blocks() if someone tries to write
 * This function is called by ext4_ext_map_blocks() if someone tries to write
 * to an uninitialized extent. It may result in splitting the uninitialized
 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3067,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
					   struct ext4_map_blocks *map,
					   struct ext4_map_blocks *map,
					   struct ext4_ext_path *path)
					   struct ext4_ext_path *path)
{
{
	struct ext4_sb_info *sbi;
	struct ext4_extent_header *eh;
	struct ext4_extent_header *eh;
	struct ext4_map_blocks split_map;
	struct ext4_map_blocks split_map;
	struct ext4_extent zero_ex;
	struct ext4_extent zero_ex;
	struct ext4_extent *ex;
	struct ext4_extent *ex;
	ext4_lblk_t ee_block, eof_block;
	ext4_lblk_t ee_block, eof_block;
	unsigned int ee_len, depth;
	unsigned int ee_len, depth;
	int allocated;
	int allocated, max_zeroout = 0;
	int err = 0;
	int err = 0;
	int split_flag = 0;
	int split_flag = 0;


@@ -3081,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
		"block %llu, max_blocks %u\n", inode->i_ino,
		"block %llu, max_blocks %u\n", inode->i_ino,
		(unsigned long long)map->m_lblk, map->m_len);
		(unsigned long long)map->m_lblk, map->m_len);


	sbi = EXT4_SB(inode->i_sb);
	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
		inode->i_sb->s_blocksize_bits;
		inode->i_sb->s_blocksize_bits;
	if (eof_block < map->m_lblk + map->m_len)
	if (eof_block < map->m_lblk + map->m_len)
@@ -3180,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
	 */
	 */
	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
	split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;


	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
	if (EXT4_EXT_MAY_ZEROOUT & split_flag)
	if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
		max_zeroout = sbi->s_extent_max_zeroout_kb >>
	    (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
			inode->i_sb->s_blocksize_bits;

	/* If extent is less than s_max_zeroout_kb, zeroout directly */
	if (max_zeroout && (ee_len <= max_zeroout)) {
		err = ext4_ext_zeroout(inode, ex);
		err = ext4_ext_zeroout(inode, ex);
		if (err)
		if (err)
			goto out;
			goto out;
@@ -3191,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
		if (err)
		if (err)
			goto out;
			goto out;
		ext4_ext_mark_initialized(ex);
		ext4_ext_mark_initialized(ex);
		ext4_ext_try_to_merge(inode, path, ex);
		ext4_ext_try_to_merge(handle, inode, path, ex);
		err = ext4_ext_dirty(handle, inode, path + depth);
		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
		goto out;
		goto out;
	}
	}


@@ -3206,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
	split_map.m_lblk = map->m_lblk;
	split_map.m_lblk = map->m_lblk;
	split_map.m_len = map->m_len;
	split_map.m_len = map->m_len;


	if (allocated > map->m_len) {
	if (max_zeroout && (allocated > map->m_len)) {
		if (allocated <= EXT4_EXT_ZERO_LEN &&
		if (allocated <= max_zeroout) {
		    (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
			/* case 3 */
			/* case 3 */
			zero_ex.ee_block =
			zero_ex.ee_block =
					 cpu_to_le32(map->m_lblk);
					 cpu_to_le32(map->m_lblk);
@@ -3220,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
				goto out;
				goto out;
			split_map.m_lblk = map->m_lblk;
			split_map.m_lblk = map->m_lblk;
			split_map.m_len = allocated;
			split_map.m_len = allocated;
		} else if ((map->m_lblk - ee_block + map->m_len <
		} else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
			   EXT4_EXT_ZERO_LEN) &&
			   (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
			/* case 2 */
			/* case 2 */
			if (map->m_lblk != ee_block) {
			if (map->m_lblk != ee_block) {
				zero_ex.ee_block = ex->ee_block;
				zero_ex.ee_block = ex->ee_block;
@@ -3256,7 +3276,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 * to an uninitialized extent.
 * to an uninitialized extent.
 *
 *
 * Writing to an uninitialized extent may result in splitting the uninitialized
 * Writing to an uninitialized extent may result in splitting the uninitialized
 * extent into multiple /initialized uninitialized extents (up to three)
 * extent into multiple initialized/uninitialized extents (up to three)
 * There are three possibilities:
 * There are three possibilities:
 *   a> There is no split required: Entire extent should be uninitialized
 *   a> There is no split required: Entire extent should be uninitialized
 *   b> Splits in two extents: Write is happening at either end of the extent
 *   b> Splits in two extents: Write is happening at either end of the extent
@@ -3333,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
	/* note: ext4_ext_correct_indexes() isn't needed here because
	/* note: ext4_ext_correct_indexes() isn't needed here because
	 * borders are not changed
	 * borders are not changed
	 */
	 */
	ext4_ext_try_to_merge(inode, path, ex);
	ext4_ext_try_to_merge(handle, inode, path, ex);


	/* Mark modified extent as dirty */
	/* Mark modified extent as dirty */
	err = ext4_ext_dirty(handle, inode, path + depth);
	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
out:
	ext4_ext_show_leaf(inode, path);
	ext4_ext_show_leaf(inode, path);
	return err;
	return err;
@@ -3600,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
{
{
	int ret = 0;
	int ret = 0;
	int err = 0;
	int err = 0;
	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
	ext4_io_end_t *io = ext4_inode_aio(inode);


	ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
	ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
		  "block %llu, max_blocks %u, flags %x, allocated %u\n",
		  "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3615,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
		ret = ext4_split_unwritten_extents(handle, inode, map,
		ret = ext4_split_unwritten_extents(handle, inode, map,
						   path, flags);
						   path, flags);
		if (ret <= 0)
			goto out;
		/*
		/*
		 * Flag the inode(non aio case) or end_io struct (aio case)
		 * Flag the inode(non aio case) or end_io struct (aio case)
		 * that this IO needs to conversion to written when IO is
		 * that this IO needs to conversion to written when IO is
@@ -3858,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
	unsigned int allocated = 0, offset = 0;
	unsigned int allocated = 0, offset = 0;
	unsigned int allocated_clusters = 0;
	unsigned int allocated_clusters = 0;
	struct ext4_allocation_request ar;
	struct ext4_allocation_request ar;
	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
	ext4_io_end_t *io = ext4_inode_aio(inode);
	ext4_lblk_t cluster_offset;
	ext4_lblk_t cluster_offset;
	int set_unwritten = 0;


	ext_debug("blocks %u/%u requested for inode %lu\n",
	ext_debug("blocks %u/%u requested for inode %lu\n",
		  map->m_lblk, map->m_len, inode->i_ino);
		  map->m_lblk, map->m_len, inode->i_ino);
@@ -4082,13 +4105,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
		 * For non asycn direct IO case, flag the inode state
		 * For non asycn direct IO case, flag the inode state
		 * that we need to perform conversion when IO is done.
		 * that we need to perform conversion when IO is done.
		 */
		 */
		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
		if ((flags & EXT4_GET_BLOCKS_PRE_IO))
			if (io)
			set_unwritten = 1;
				ext4_set_io_unwritten_flag(inode, io);
			else
				ext4_set_inode_state(inode,
						     EXT4_STATE_DIO_UNWRITTEN);
		}
		if (ext4_should_dioread_nolock(inode))
		if (ext4_should_dioread_nolock(inode))
			map->m_flags |= EXT4_MAP_UNINIT;
			map->m_flags |= EXT4_MAP_UNINIT;
	}
	}
@@ -4100,6 +4118,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
	if (!err)
	if (!err)
		err = ext4_ext_insert_extent(handle, inode, path,
		err = ext4_ext_insert_extent(handle, inode, path,
					     &newex, flags);
					     &newex, flags);

	if (!err && set_unwritten) {
		if (io)
			ext4_set_io_unwritten_flag(inode, io);
		else
			ext4_set_inode_state(inode,
					     EXT4_STATE_DIO_UNWRITTEN);
	}

	if (err && free_on_err) {
	if (err && free_on_err) {
		int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
		int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
			EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
			EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4241,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
	 * finish any pending end_io work so we won't run the risk of
	 * finish any pending end_io work so we won't run the risk of
	 * converting any truncated blocks to initialized later
	 * converting any truncated blocks to initialized later
	 */
	 */
	ext4_flush_completed_IO(inode);
	ext4_flush_unwritten_io(inode);


	/*
	/*
	 * probably first extent we're gonna free will be last in block
	 * probably first extent we're gonna free will be last in block
@@ -4769,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
	loff_t first_page_offset, last_page_offset;
	loff_t first_page_offset, last_page_offset;
	int credits, err = 0;
	int credits, err = 0;


	/*
	 * Write out all dirty pages to avoid race conditions
	 * Then release them.
	 */
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		err = filemap_write_and_wait_range(mapping,
			offset, offset + length - 1);

		if (err)
			return err;
	}

	mutex_lock(&inode->i_mutex);
	/* It's not possible punch hole on append only file */
	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
		err = -EPERM;
		goto out_mutex;
	}
	if (IS_SWAPFILE(inode)) {
		err = -ETXTBSY;
		goto out_mutex;
	}

	/* No need to punch hole beyond i_size */
	/* No need to punch hole beyond i_size */
	if (offset >= inode->i_size)
	if (offset >= inode->i_size)
		return 0;
		goto out_mutex;


	/*
	/*
	 * If the hole extends beyond i_size, set the hole
	 * If the hole extends beyond i_size, set the hole
@@ -4789,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
	first_page_offset = first_page << PAGE_CACHE_SHIFT;
	first_page_offset = first_page << PAGE_CACHE_SHIFT;
	last_page_offset = last_page << PAGE_CACHE_SHIFT;
	last_page_offset = last_page << PAGE_CACHE_SHIFT;


	/*
	 * Write out all dirty pages to avoid race conditions
	 * Then release them.
	 */
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		err = filemap_write_and_wait_range(mapping,
			offset, offset + length - 1);

		if (err)
			return err;
	}

	/* Now release the pages */
	/* Now release the pages */
	if (last_page_offset > first_page_offset) {
	if (last_page_offset > first_page_offset) {
		truncate_pagecache_range(inode, first_page_offset,
		truncate_pagecache_range(inode, first_page_offset,
					 last_page_offset - 1);
					 last_page_offset - 1);
	}
	}


	/* finish any pending end_io work */
	/* Wait all existing dio workers, newcomers will block on i_mutex */
	ext4_flush_completed_IO(inode);
	ext4_inode_block_unlocked_dio(inode);
	err = ext4_flush_unwritten_io(inode);
	if (err)
		goto out_dio;
	inode_dio_wait(inode);


	credits = ext4_writepage_trans_blocks(inode);
	credits = ext4_writepage_trans_blocks(inode);
	handle = ext4_journal_start(inode, credits);
	handle = ext4_journal_start(inode, credits);
	if (IS_ERR(handle))
	if (IS_ERR(handle)) {
		return PTR_ERR(handle);
		err = PTR_ERR(handle);
		goto out_dio;
	}


	err = ext4_orphan_add(handle, inode);
	if (err)
		goto out;


	/*
	/*
	 * Now we need to zero out the non-page-aligned data in the
	 * Now we need to zero out the non-page-aligned data in the
@@ -4903,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
	up_write(&EXT4_I(inode)->i_data_sem);
	up_write(&EXT4_I(inode)->i_data_sem);


out:
out:
	ext4_orphan_del(handle, inode);
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
	ext4_mark_inode_dirty(handle, inode);
	ext4_mark_inode_dirty(handle, inode);
	ext4_journal_stop(handle);
	ext4_journal_stop(handle);
out_dio:
	ext4_inode_resume_unlocked_dio(inode);
out_mutex:
	mutex_unlock(&inode->i_mutex);
	return err;
	return err;
}
}
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
Loading