Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4f579ae7 authored by Lukas Czerner's avatar Lukas Czerner Committed by Theodore Ts'o
Browse files

ext4: fix punch hole on files with indirect mapping



Currently punch hole code on files with direct/indirect mapping has some
problems which may lead to a data loss. For example (from Jan Kara):

fallocate -n -p 10240000 4096

will punch the range 10240000 - 12632064 instead of the range 1024000 -
10244096.

Also the code is a bit weird and it's not using infrastructure provided
by indirect.c, but rather creating it's own way.

This patch fixes the issues as well as making the operation to run 4
times faster from my testing (punching out 60GB file). It uses similar
approach used in ext4_ind_truncate() which takes advantage of
ext4_free_branches() function.

Also rename the ext4_free_hole_blocks() to something more sensible, like
the equivalent we have for extent mapped files. Call it
ext4_ind_remove_space().

This has been tested mostly with fsx and some xfstests which are testing
punch hole but does not require unwritten extents which are not
supported with direct/indirect mapping. Not problems showed up even with
1024k block size.

CC: stable@vger.kernel.org
Signed-off-by: default avatarLukas Czerner <lczerner@redhat.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 71d4f7d0
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -2143,8 +2143,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
				 ext4_lblk_t first, ext4_lblk_t stop);
extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
				 ext4_lblk_t start, ext4_lblk_t end);

/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+202 −79
Original line number Diff line number Diff line
@@ -1295,97 +1295,220 @@ void ext4_ind_truncate(handle_t *handle, struct inode *inode)
	}
}

static int free_hole_blocks(handle_t *handle, struct inode *inode,
			    struct buffer_head *parent_bh, __le32 *i_data,
			    int level, ext4_lblk_t first,
			    ext4_lblk_t count, int max)
/**
 *	ext4_ind_remove_space - remove space from the range
 *	@handle: JBD handle for this transaction
 *	@inode:	inode we are dealing with
 *	@start:	First block to remove
 *	@end:	One block after the last block to remove (exclusive)
 *
 *	Free the blocks in the defined range (end is exclusive endpoint of
 *	range). This is used by ext4_punch_hole().
 */
int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
			  ext4_lblk_t start, ext4_lblk_t end)
{
	struct buffer_head *bh = NULL;
	struct ext4_inode_info *ei = EXT4_I(inode);
	__le32 *i_data = ei->i_data;
	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
	int ret = 0;
	int i, inc;
	ext4_lblk_t offset;
	__le32 blk;

	inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
	for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
		if (offset >= count + first)
			break;
		if (*i_data == 0 || (offset + inc) <= first)
			continue;
		blk = *i_data;
		if (level > 0) {
			ext4_lblk_t first2;
			ext4_lblk_t count2;
	ext4_lblk_t offsets[4], offsets2[4];
	Indirect chain[4], chain2[4];
	Indirect *partial, *partial2;
	ext4_lblk_t max_block;
	__le32 nr = 0, nr2 = 0;
	int n = 0, n2 = 0;
	unsigned blocksize = inode->i_sb->s_blocksize;

			bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
			if (!bh) {
				EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
						       "Read failure");
				return -EIO;
	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
	if (end >= max_block)
		end = max_block;
	if ((start >= end) || (start > max_block))
		return 0;

	n = ext4_block_to_path(inode, start, offsets, NULL);
	n2 = ext4_block_to_path(inode, end, offsets2, NULL);

	BUG_ON(n > n2);

	if ((n == 1) && (n == n2)) {
		/* We're punching only within direct block range */
		ext4_free_data(handle, inode, NULL, i_data + offsets[0],
			       i_data + offsets2[0]);
		return 0;
	} else if (n2 > n) {
		/*
		 * Start and end are on a different levels so we're going to
		 * free partial block at start, and partial block at end of
		 * the range. If there are some levels in between then
		 * do_indirects label will take care of that.
		 */

		if (n == 1) {
			/*
			 * Start is at the direct block level, free
			 * everything to the end of the level.
			 */
			ext4_free_data(handle, inode, NULL, i_data + offsets[0],
				       i_data + EXT4_NDIR_BLOCKS);
			goto end_range;
		}
			if (first > offset) {
				first2 = first - offset;
				count2 = count;


		partial = ext4_find_shared(inode, n, offsets, chain, &nr);
		if (nr) {
			if (partial == chain) {
				/* Shared branch grows from the inode */
				ext4_free_branches(handle, inode, NULL,
					   &nr, &nr+1, (chain+n-1) - partial);
				*partial->p = 0;
			} else {
				first2 = 0;
				count2 = count - (offset - first);
				/* Shared branch grows from an indirect block */
				BUFFER_TRACE(partial->bh, "get_write_access");
				ext4_free_branches(handle, inode, partial->bh,
					partial->p,
					partial->p+1, (chain+n-1) - partial);
			}
			ret = free_hole_blocks(handle, inode, bh,
					       (__le32 *)bh->b_data, level - 1,
					       first2, count2,
					       inode->i_sb->s_blocksize >> 2);
			if (ret) {
				brelse(bh);
				goto err;
		}

		/*
		 * Clear the ends of indirect blocks on the shared branch
		 * at the start of the range
		 */
		while (partial > chain) {
			ext4_free_branches(handle, inode, partial->bh,
				partial->p + 1,
				(__le32 *)partial->bh->b_data+addr_per_block,
				(chain+n-1) - partial);
			BUFFER_TRACE(partial->bh, "call brelse");
			brelse(partial->bh);
			partial--;
		}
		if (level == 0 ||
		    (bh && all_zeroes((__le32 *)bh->b_data,
				      (__le32 *)bh->b_data + addr_per_block))) {
			ext4_free_data(handle, inode, parent_bh,
				       i_data, i_data + 1);

end_range:
		partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
		if (nr2) {
			if (partial2 == chain2) {
				/*
				 * Remember, end is exclusive so here we're at
				 * the start of the next level we're not going
				 * to free. Everything was covered by the start
				 * of the range.
				 */
				return 0;
			} else {
				/* Shared branch grows from an indirect block */
				partial2--;
			}
		brelse(bh);
		bh = NULL;
		} else {
			/*
			 * ext4_find_shared returns Indirect structure which
			 * points to the last element which should not be
			 * removed by truncate. But this is end of the range
			 * in punch_hole so we need to point to the next element
			 */
			partial2->p++;
		}

err:
	return ret;
		/*
		 * Clear the ends of indirect blocks on the shared branch
		 * at the end of the range
		 */
		while (partial2 > chain2) {
			ext4_free_branches(handle, inode, partial2->bh,
					   (__le32 *)partial2->bh->b_data,
					   partial2->p,
					   (chain2+n2-1) - partial2);
			BUFFER_TRACE(partial2->bh, "call brelse");
			brelse(partial2->bh);
			partial2--;
		}
		goto do_indirects;
	}

int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
			  ext4_lblk_t first, ext4_lblk_t stop)
{
	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
	int level, ret = 0;
	int num = EXT4_NDIR_BLOCKS;
	ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
	__le32 *i_data = EXT4_I(inode)->i_data;

	count = stop - first;
	for (level = 0; level < 4; level++, max *= addr_per_block) {
		if (first < max) {
			ret = free_hole_blocks(handle, inode, NULL, i_data,
					       level, first, count, num);
			if (ret)
				goto err;
			if (count > max - first)
				count -= max - first;
			else
				break;
			first = 0;
		} else {
			first -= max;
	/* Punch happened within the same level (n == n2) */
	partial = ext4_find_shared(inode, n, offsets, chain, &nr);
	partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
	/*
	 * ext4_find_shared returns Indirect structure which
	 * points to the last element which should not be
	 * removed by truncate. But this is end of the range
	 * in punch_hole so we need to point to the next element
	 */
	partial2->p++;
	while ((partial > chain) || (partial2 > chain2)) {
		/* We're at the same block, so we're almost finished */
		if ((partial->bh && partial2->bh) &&
		    (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
			if ((partial > chain) && (partial2 > chain2)) {
				ext4_free_branches(handle, inode, partial->bh,
						   partial->p + 1,
						   partial2->p,
						   (chain+n-1) - partial);
				BUFFER_TRACE(partial->bh, "call brelse");
				brelse(partial->bh);
				BUFFER_TRACE(partial2->bh, "call brelse");
				brelse(partial2->bh);
			}
		i_data += num;
		if (level == 0) {
			num = 1;
			max = 1;
			return 0;
		}
		/*
		 * Clear the ends of indirect blocks on the shared branch
		 * at the start of the range
		 */
		if (partial > chain) {
			ext4_free_branches(handle, inode, partial->bh,
				   partial->p + 1,
				   (__le32 *)partial->bh->b_data+addr_per_block,
				   (chain+n-1) - partial);
			BUFFER_TRACE(partial->bh, "call brelse");
			brelse(partial->bh);
			partial--;
		}
		/*
		 * Clear the ends of indirect blocks on the shared branch
		 * at the end of the range
		 */
		if (partial2 > chain2) {
			ext4_free_branches(handle, inode, partial2->bh,
					   (__le32 *)partial2->bh->b_data,
					   partial2->p,
					   (chain2+n-1) - partial2);
			BUFFER_TRACE(partial2->bh, "call brelse");
			brelse(partial2->bh);
			partial2--;
		}

err:
	return ret;
	}

do_indirects:
	/* Kill the remaining (whole) subtrees */
	switch (offsets[0]) {
	default:
		if (++n >= n2)
			return 0;
		nr = i_data[EXT4_IND_BLOCK];
		if (nr) {
			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
			i_data[EXT4_IND_BLOCK] = 0;
		}
	case EXT4_IND_BLOCK:
		if (++n >= n2)
			return 0;
		nr = i_data[EXT4_DIND_BLOCK];
		if (nr) {
			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
			i_data[EXT4_DIND_BLOCK] = 0;
		}
	case EXT4_DIND_BLOCK:
		if (++n >= n2)
			return 0;
		nr = i_data[EXT4_TIND_BLOCK];
		if (nr) {
			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
			i_data[EXT4_TIND_BLOCK] = 0;
		}
	case EXT4_TIND_BLOCK:
		;
	}
	return 0;
}
+1 −1
Original line number Diff line number Diff line
@@ -3506,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
		ret = ext4_ext_remove_space(inode, first_block,
					    stop_block - 1);
	else
		ret = ext4_free_hole_blocks(handle, inode, first_block,
		ret = ext4_ind_remove_space(handle, inode, first_block,
					    stop_block);

	up_write(&EXT4_I(inode)->i_data_sem);