Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26a4c0c6 authored by Theodore Ts'o's avatar Theodore Ts'o
Browse files

ext4: refactor punch hole code



Move common code in ext4_ind_punch_hole() and ext4_ext_punch_hole()
into ext4_punch_hole().  This saves over 150 lines of code.

This also fixes a potential bug when the punch_hole() code is racing
against indirect-to-extents or extents-to-indirect migation.  We are
currently using i_mutex to protect against changes to the inode flag;
specifically, the append-only, immutable, and extents inode flags.  So
we need to take i_mutex before deciding whether to use the
extents-specific or indirect-specific punch_hole code.

Also, there was a missing call to ext4_inode_block_unlocked_dio() in
the indirect punch codepath.  This was added in commit 02d262df
to block DIO readers racing against the punch operation in the
codepath for extent-mapped inodes, but it was missing for
indirect-block mapped inodes.  One of the advantages of refactoring
the code is that it makes such oversights much less likely.

Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 781f143e
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -2110,7 +2110,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
extern void ext4_ind_truncate(struct inode *inode);
extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
				 ext4_lblk_t first, ext4_lblk_t stop);

/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2575,8 +2576,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
			       struct ext4_map_blocks *map, int flags);
extern void ext4_ext_truncate(struct inode *);
extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
				loff_t length);
extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
				 ext4_lblk_t end);
extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
+2 −183
Original line number Diff line number Diff line
@@ -2599,7 +2599,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
	return 1;
}

static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
			  ext4_lblk_t end)
{
	struct super_block *sb = inode->i_sb;
@@ -4623,187 +4623,6 @@ static int ext4_xattr_fiemap(struct inode *inode,
	return (error < 0 ? error : 0);
}

/*
 * ext4_ext_punch_hole
 *
 * Punches a hole of "length" bytes in a file starting
 * at byte "offset"
 *
 * @inode:  The inode of the file to punch a hole in
 * @offset: The starting byte offset of the hole
 * @length: The length of the hole
 *
 * Returns the number of blocks removed or negative on err
 */
int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
{
	struct inode *inode = file_inode(file);
	struct super_block *sb = inode->i_sb;
	ext4_lblk_t first_block, stop_block;
	struct address_space *mapping = inode->i_mapping;
	handle_t *handle;
	loff_t first_page, last_page, page_len;
	loff_t first_page_offset, last_page_offset;
	int credits, err = 0;

	/*
	 * Write out all dirty pages to avoid race conditions
	 * Then release them.
	 */
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		err = filemap_write_and_wait_range(mapping,
			offset, offset + length - 1);

		if (err)
			return err;
	}

	mutex_lock(&inode->i_mutex);
	/* It's not possible punch hole on append only file */
	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
		err = -EPERM;
		goto out_mutex;
	}
	if (IS_SWAPFILE(inode)) {
		err = -ETXTBSY;
		goto out_mutex;
	}

	/* No need to punch hole beyond i_size */
	if (offset >= inode->i_size)
		goto out_mutex;

	/*
	 * If the hole extends beyond i_size, set the hole
	 * to end after the page that contains i_size
	 */
	if (offset + length > inode->i_size) {
		length = inode->i_size +
		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
		   offset;
	}

	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	last_page = (offset + length) >> PAGE_CACHE_SHIFT;

	first_page_offset = first_page << PAGE_CACHE_SHIFT;
	last_page_offset = last_page << PAGE_CACHE_SHIFT;

	/* Now release the pages */
	if (last_page_offset > first_page_offset) {
		truncate_pagecache_range(inode, first_page_offset,
					 last_page_offset - 1);
	}

	/* Wait all existing dio workers, newcomers will block on i_mutex */
	ext4_inode_block_unlocked_dio(inode);
	err = ext4_flush_unwritten_io(inode);
	if (err)
		goto out_dio;
	inode_dio_wait(inode);

	credits = ext4_writepage_trans_blocks(inode);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	if (IS_ERR(handle)) {
		err = PTR_ERR(handle);
		goto out_dio;
	}


	/*
	 * Now we need to zero out the non-page-aligned data in the
	 * pages at the start and tail of the hole, and unmap the buffer
	 * heads for the block aligned regions of the page that were
	 * completely zeroed.
	 */
	if (first_page > last_page) {
		/*
		 * If the file space being truncated is contained within a page
		 * just zero out and unmap the middle of that page
		 */
		err = ext4_discard_partial_page_buffers(handle,
			mapping, offset, length, 0);

		if (err)
			goto out;
	} else {
		/*
		 * zero out and unmap the partial page that contains
		 * the start of the hole
		 */
		page_len  = first_page_offset - offset;
		if (page_len > 0) {
			err = ext4_discard_partial_page_buffers(handle, mapping,
						   offset, page_len, 0);
			if (err)
				goto out;
		}

		/*
		 * zero out and unmap the partial page that contains
		 * the end of the hole
		 */
		page_len = offset + length - last_page_offset;
		if (page_len > 0) {
			err = ext4_discard_partial_page_buffers(handle, mapping,
					last_page_offset, page_len, 0);
			if (err)
				goto out;
		}
	}

	/*
	 * If i_size is contained in the last page, we need to
	 * unmap and zero the partial page after i_size
	 */
	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
	   inode->i_size % PAGE_CACHE_SIZE != 0) {

		page_len = PAGE_CACHE_SIZE -
			(inode->i_size & (PAGE_CACHE_SIZE - 1));

		if (page_len > 0) {
			err = ext4_discard_partial_page_buffers(handle,
			  mapping, inode->i_size, page_len, 0);

			if (err)
				goto out;
		}
	}

	first_block = (offset + sb->s_blocksize - 1) >>
		EXT4_BLOCK_SIZE_BITS(sb);
	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);

	/* If there are no blocks to remove, return now */
	if (first_block >= stop_block)
		goto out;

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode);

	err = ext4_es_remove_extent(inode, first_block,
				    stop_block - first_block);
	err = ext4_ext_remove_space(inode, first_block, stop_block - 1);

	ext4_discard_preallocations(inode);

	if (IS_SYNC(inode))
		ext4_handle_sync(handle);

	up_write(&EXT4_I(inode)->i_data_sem);

out:
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
	ext4_mark_inode_dirty(handle, inode);
	ext4_journal_stop(handle);
out_dio:
	ext4_inode_resume_unlocked_dio(inode);
out_mutex:
	mutex_unlock(&inode->i_mutex);
	return err;
}

int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		__u64 start, __u64 len)
{
+2 −156
Original line number Diff line number Diff line
@@ -1434,7 +1434,7 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
	return ret;
}

static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
			  ext4_lblk_t first, ext4_lblk_t stop)
{
	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
@@ -1469,157 +1469,3 @@ static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
	return ret;
}
int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
{
	struct inode *inode = file_inode(file);
	struct super_block *sb = inode->i_sb;
	ext4_lblk_t first_block, stop_block;
	struct address_space *mapping = inode->i_mapping;
	handle_t *handle = NULL;
	loff_t first_page, last_page, page_len;
	loff_t first_page_offset, last_page_offset;
	int err = 0;

	/*
	 * Write out all dirty pages to avoid race conditions
	 * Then release them.
	 */
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		err = filemap_write_and_wait_range(mapping,
			offset, offset + length - 1);
		if (err)
			return err;
	}

	mutex_lock(&inode->i_mutex);
	/* It's not possible punch hole on append only file */
	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
		err = -EPERM;
		goto out_mutex;
	}
	if (IS_SWAPFILE(inode)) {
		err = -ETXTBSY;
		goto out_mutex;
	}

	/* No need to punch hole beyond i_size */
	if (offset >= inode->i_size)
		goto out_mutex;

	/*
	 * If the hole extents beyond i_size, set the hole
	 * to end after the page that contains i_size
	 */
	if (offset + length > inode->i_size) {
		length = inode->i_size +
		    PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
		    offset;
	}

	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	last_page = (offset + length) >> PAGE_CACHE_SHIFT;

	first_page_offset = first_page << PAGE_CACHE_SHIFT;
	last_page_offset = last_page << PAGE_CACHE_SHIFT;

	/* Now release the pages */
	if (last_page_offset > first_page_offset) {
		truncate_pagecache_range(inode, first_page_offset,
					 last_page_offset - 1);
	}

	/* Wait all existing dio works, newcomers will block on i_mutex */
	inode_dio_wait(inode);

	handle = start_transaction(inode);
	if (IS_ERR(handle))
		goto out_mutex;

	/*
	 * Now we need to zero out the non-page-aligned data in the
	 * pages at the start and tail of the hole, and unmap the buffer
	 * heads for the block aligned regions of the page that were
	 * completely zerod.
	 */
	if (first_page > last_page) {
		/*
		 * If the file space being truncated is contained within a page
		 * just zero out and unmap the middle of that page
		 */
		err = ext4_discard_partial_page_buffers(handle,
			mapping, offset, length, 0);
		if (err)
			goto out;
	} else {
		/*
		 * Zero out and unmap the paritial page that contains
		 * the start of the hole
		 */
		page_len = first_page_offset - offset;
		if (page_len > 0) {
			err = ext4_discard_partial_page_buffers(handle, mapping,
							offset, page_len, 0);
			if (err)
				goto out;
		}

		/*
		 * Zero out and unmap the partial page that contains
		 * the end of the hole
		 */
		page_len = offset + length - last_page_offset;
		if (page_len > 0) {
			err = ext4_discard_partial_page_buffers(handle, mapping,
						last_page_offset, page_len, 0);
			if (err)
				goto out;
		}
	}

	/*
	 * If i_size contained in the last page, we need to
	 * unmap and zero the paritial page after i_size
	 */
	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
	    inode->i_size % PAGE_CACHE_SIZE != 0) {
		page_len = PAGE_CACHE_SIZE -
			(inode->i_size & (PAGE_CACHE_SIZE - 1));
		if (page_len > 0) {
			err = ext4_discard_partial_page_buffers(handle,
				mapping, inode->i_size, page_len, 0);
			if (err)
				goto out;
		}
	}

	first_block = (offset + sb->s_blocksize - 1) >>
		EXT4_BLOCK_SIZE_BITS(sb);
	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);

	if (first_block >= stop_block)
		goto out;

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode);

	err = ext4_es_remove_extent(inode, first_block,
				    stop_block - first_block);
	err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);

	ext4_discard_preallocations(inode);

	if (IS_SYNC(inode))
		ext4_handle_sync(handle);

	up_write(&EXT4_I(inode)->i_data_sem);

out:
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
	ext4_mark_inode_dirty(handle, inode);
	ext4_journal_stop(handle);

out_mutex:
	mutex_unlock(&inode->i_mutex);

	return err;
}
+175 −5
Original line number Diff line number Diff line
@@ -3566,20 +3566,190 @@ int ext4_can_truncate(struct inode *inode)
int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
{
	struct inode *inode = file_inode(file);
	struct super_block *sb = inode->i_sb;
	ext4_lblk_t first_block, stop_block;
	struct address_space *mapping = inode->i_mapping;
	loff_t first_page, last_page, page_len;
	loff_t first_page_offset, last_page_offset;
	handle_t *handle;
	unsigned int credits;
	int ret = 0;

	if (!S_ISREG(inode->i_mode))
		return -EOPNOTSUPP;

	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
		return ext4_ind_punch_hole(file, offset, length);

	if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
	if (EXT4_SB(sb)->s_cluster_ratio > 1) {
		/* TODO: Add support for bigalloc file systems */
		return -EOPNOTSUPP;
	}

	trace_ext4_punch_hole(inode, offset, length);

	return ext4_ext_punch_hole(file, offset, length);
	/*
	 * Write out all dirty pages to avoid race conditions
	 * Then release them.
	 */
	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		ret = filemap_write_and_wait_range(mapping, offset,
						   offset + length - 1);
		if (ret)
			return ret;
	}

	mutex_lock(&inode->i_mutex);
	/* It's not possible punch hole on append only file */
	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
		ret = -EPERM;
		goto out_mutex;
	}
	if (IS_SWAPFILE(inode)) {
		ret = -ETXTBSY;
		goto out_mutex;
	}

	/* No need to punch hole beyond i_size */
	if (offset >= inode->i_size)
		goto out_mutex;

	/*
	 * If the hole extends beyond i_size, set the hole
	 * to end after the page that contains i_size
	 */
	if (offset + length > inode->i_size) {
		length = inode->i_size +
		   PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
		   offset;
	}

	first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	last_page = (offset + length) >> PAGE_CACHE_SHIFT;

	first_page_offset = first_page << PAGE_CACHE_SHIFT;
	last_page_offset = last_page << PAGE_CACHE_SHIFT;

	/* Now release the pages */
	if (last_page_offset > first_page_offset) {
		truncate_pagecache_range(inode, first_page_offset,
					 last_page_offset - 1);
	}

	/* Wait all existing dio workers, newcomers will block on i_mutex */
	ext4_inode_block_unlocked_dio(inode);
	ret = ext4_flush_unwritten_io(inode);
	if (ret)
		goto out_dio;
	inode_dio_wait(inode);

	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
		credits = ext4_writepage_trans_blocks(inode);
	else
		credits = ext4_blocks_for_truncate(inode);
	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		ext4_std_error(sb, ret);
		goto out_dio;
	}

	/*
	 * Now we need to zero out the non-page-aligned data in the
	 * pages at the start and tail of the hole, and unmap the
	 * buffer heads for the block aligned regions of the page that
	 * were completely zeroed.
	 */
	if (first_page > last_page) {
		/*
		 * If the file space being truncated is contained
		 * within a page just zero out and unmap the middle of
		 * that page
		 */
		ret = ext4_discard_partial_page_buffers(handle,
			mapping, offset, length, 0);

		if (ret)
			goto out_stop;
	} else {
		/*
		 * zero out and unmap the partial page that contains
		 * the start of the hole
		 */
		page_len = first_page_offset - offset;
		if (page_len > 0) {
			ret = ext4_discard_partial_page_buffers(handle, mapping,
						offset, page_len, 0);
			if (ret)
				goto out_stop;
		}

		/*
		 * zero out and unmap the partial page that contains
		 * the end of the hole
		 */
		page_len = offset + length - last_page_offset;
		if (page_len > 0) {
			ret = ext4_discard_partial_page_buffers(handle, mapping,
					last_page_offset, page_len, 0);
			if (ret)
				goto out_stop;
		}
	}

	/*
	 * If i_size is contained in the last page, we need to
	 * unmap and zero the partial page after i_size
	 */
	if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
	   inode->i_size % PAGE_CACHE_SIZE != 0) {
		page_len = PAGE_CACHE_SIZE -
			(inode->i_size & (PAGE_CACHE_SIZE - 1));

		if (page_len > 0) {
			ret = ext4_discard_partial_page_buffers(handle,
					mapping, inode->i_size, page_len, 0);

			if (ret)
				goto out_stop;
		}
	}

	first_block = (offset + sb->s_blocksize - 1) >>
		EXT4_BLOCK_SIZE_BITS(sb);
	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);

	/* If there are no blocks to remove, return now */
	if (first_block >= stop_block)
		goto out_stop;

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode);

	ret = ext4_es_remove_extent(inode, first_block,
				    stop_block - first_block);
	if (ret) {
		up_write(&EXT4_I(inode)->i_data_sem);
		goto out_stop;
	}

	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
		ret = ext4_ext_remove_space(inode, first_block,
					    stop_block - 1);
	else
		ret = ext4_free_hole_blocks(handle, inode, first_block,
					    stop_block);

	ext4_discard_preallocations(inode);
	if (IS_SYNC(inode))
		ext4_handle_sync(handle);
	up_write(&EXT4_I(inode)->i_data_sem);
	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
	ext4_mark_inode_dirty(handle, inode);
out_stop:
	ext4_journal_stop(handle);
out_dio:
	ext4_inode_resume_unlocked_dio(inode);
out_mutex:
	mutex_unlock(&inode->i_mutex);
	return ret;
}

/*