Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 545052e9 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Theodore Ts'o
Browse files

ext4: Switch to iomap for SEEK_HOLE / SEEK_DATA



Switch to the iomap_seek_hole and iomap_seek_data helpers for
implementing lseek SEEK_HOLE / SEEK_DATA, and remove all the code that
isn't needed any more.

Note that with this patch ext4 will now always depend on the iomap code
instead of only when CONFIG_DAX is enabled, and it requires adding a
call into the extent status tree for iomap_begin as well to properly
deal with delalloc extents.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
[More fixes and cleanups by Andreas]
parent 7046ae35
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@ config EXT4_FS
	select CRC16
	select CRYPTO
	select CRYPTO_CRC32C
	select FS_IOMAP
	help
	  This is the next generation of the ext3 filesystem.

+0 −3
Original line number Diff line number Diff line
@@ -2515,9 +2515,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
					int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
			      ext4_fsblk_t pblk, ext4_lblk_t len);
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
				unsigned int map_len,
				struct extent_status *result);

/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+14 −249
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@

#include <linux/time.h>
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/dax.h>
@@ -437,248 +438,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
	return dquot_file_open(inode, filp);
}

/*
 * Here we use ext4_map_blocks() to get a block mapping for a extent-based
 * file rather than ext4_ext_walk_space() because we can introduce
 * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
 * function.  When extent status tree has been fully implemented, it will
 * track all extent status for a file and we can directly use it to
 * retrieve the offset for SEEK_DATA/SEEK_HOLE.
 */

/*
 * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
 * lookup page cache to check whether or not there has some data between
 * [startoff, endoff] because, if this range contains an unwritten extent,
 * we determine this extent as a data or a hole according to whether the
 * page cache has data or not.
 */
static int ext4_find_unwritten_pgoff(struct inode *inode,
				     int whence,
				     ext4_lblk_t end_blk,
				     loff_t *offset)
{
	struct pagevec pvec;
	unsigned int blkbits;
	pgoff_t index;
	pgoff_t end;
	loff_t endoff;
	loff_t startoff;
	loff_t lastoff;
	int found = 0;

	blkbits = inode->i_sb->s_blocksize_bits;
	startoff = *offset;
	lastoff = startoff;
	endoff = (loff_t)end_blk << blkbits;

	index = startoff >> PAGE_SHIFT;
	end = (endoff - 1) >> PAGE_SHIFT;

	pagevec_init(&pvec, 0);
	do {
		int i;
		unsigned long nr_pages;

		nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
					&index, end);
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
			struct buffer_head *bh, *head;

			/*
			 * If current offset is smaller than the page offset,
			 * there is a hole at this offset.
			 */
			if (whence == SEEK_HOLE && lastoff < endoff &&
			    lastoff < page_offset(pvec.pages[i])) {
				found = 1;
				*offset = lastoff;
				goto out;
			}

			lock_page(page);

			if (unlikely(page->mapping != inode->i_mapping)) {
				unlock_page(page);
				continue;
			}

			if (!page_has_buffers(page)) {
				unlock_page(page);
				continue;
			}

			if (page_has_buffers(page)) {
				lastoff = page_offset(page);
				bh = head = page_buffers(page);
				do {
					if (lastoff + bh->b_size <= startoff)
						goto next;
					if (buffer_uptodate(bh) ||
					    buffer_unwritten(bh)) {
						if (whence == SEEK_DATA)
							found = 1;
					} else {
						if (whence == SEEK_HOLE)
							found = 1;
					}
					if (found) {
						*offset = max_t(loff_t,
							startoff, lastoff);
						unlock_page(page);
						goto out;
					}
next:
					lastoff += bh->b_size;
					bh = bh->b_this_page;
				} while (bh != head);
			}

			lastoff = page_offset(page) + PAGE_SIZE;
			unlock_page(page);
		}

		pagevec_release(&pvec);
	} while (index <= end);

	/* There are no pages upto endoff - that would be a hole in there. */
	if (whence == SEEK_HOLE && lastoff < endoff) {
		found = 1;
		*offset = lastoff;
	}
out:
	pagevec_release(&pvec);
	return found;
}

/*
 * ext4_seek_data() retrieves the offset for SEEK_DATA.
 */
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t dataoff, isize;
	int blkbits;
	int ret;

	inode_lock(inode);

	isize = i_size_read(inode);
	if (offset < 0 || offset >= isize) {
		inode_unlock(inode);
		return -ENXIO;
	}

	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	dataoff = offset;

	do {
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret <= 0) {
			/* No extent found -> no data */
			if (ret == 0)
				ret = -ENXIO;
			inode_unlock(inode);
			return ret;
		}

		last = es.es_lblk;
		if (last != start)
			dataoff = (loff_t)last << blkbits;
		if (!ext4_es_is_unwritten(&es))
			break;

		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
					      es.es_lblk + es.es_len, &dataoff))
			break;
		last += es.es_len;
		dataoff = (loff_t)last << blkbits;
		cond_resched();
	} while (last <= end);

	inode_unlock(inode);

	if (dataoff > isize)
		return -ENXIO;

	return vfs_setpos(file, dataoff, maxsize);
}

/*
 * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
 */
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t holeoff, isize;
	int blkbits;
	int ret;

	inode_lock(inode);

	isize = i_size_read(inode);
	if (offset < 0 || offset >= isize) {
		inode_unlock(inode);
		return -ENXIO;
	}

	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	holeoff = offset;

	do {
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret < 0) {
			inode_unlock(inode);
			return ret;
		}
		/* Found a hole? */
		if (ret == 0 || es.es_lblk > last) {
			if (last != start)
				holeoff = (loff_t)last << blkbits;
			break;
		}
		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (ext4_es_is_unwritten(&es) &&
		    ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
					      last + es.es_len, &holeoff))
			break;

		last += es.es_len;
		holeoff = (loff_t)last << blkbits;
		cond_resched();
	} while (last <= end);

	inode_unlock(inode);

	if (holeoff > isize)
		holeoff = isize;

	return vfs_setpos(file, holeoff, maxsize);
}

/*
 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
 * by calling generic_file_llseek_size() with the appropriate maxbytes
@@ -695,18 +454,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
		maxbytes = inode->i_sb->s_maxbytes;

	switch (whence) {
	case SEEK_SET:
	case SEEK_CUR:
	case SEEK_END:
	default:
		return generic_file_llseek_size(file, offset, whence,
						maxbytes, i_size_read(inode));
	case SEEK_DATA:
		return ext4_seek_data(file, offset, maxbytes);
	case SEEK_HOLE:
		return ext4_seek_hole(file, offset, maxbytes);
		inode_lock_shared(inode);
		offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
		inode_unlock_shared(inode);
		break;
	case SEEK_DATA:
		inode_lock_shared(inode);
		offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
		inode_unlock_shared(inode);
		break;
	}

	return -EINVAL;
	if (offset < 0)
		return offset;
	return vfs_setpos(file, offset, maxbytes);
}

const struct file_operations ext4_file_operations = {
+34 −75
Original line number Diff line number Diff line
@@ -3393,7 +3393,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
		return try_to_free_buffers(page);
}

#ifdef CONFIG_FS_DAX
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
			    unsigned flags, struct iomap *iomap)
{
@@ -3402,6 +3401,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	unsigned long first_block = offset >> blkbits;
	unsigned long last_block = (offset + length - 1) >> blkbits;
	struct ext4_map_blocks map;
	bool delalloc = false;
	int ret;


@@ -3422,9 +3422,33 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	map.m_lblk = first_block;
	map.m_len = last_block - first_block + 1;

	if (!(flags & IOMAP_WRITE)) {
	if (flags & IOMAP_REPORT) {
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret < 0)
			return ret;

		if (ret == 0) {
			ext4_lblk_t end = map.m_lblk + map.m_len - 1;
			struct extent_status es;

			ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);

			if (!es.es_len || es.es_lblk > end) {
				/* entire range is a hole */
			} else if (es.es_lblk > map.m_lblk) {
				/* range starts with a hole */
				map.m_len = es.es_lblk - map.m_lblk;
			} else {
				ext4_lblk_t offs = 0;

				if (es.es_lblk < map.m_lblk)
					offs = map.m_lblk - es.es_lblk;
				map.m_lblk = es.es_lblk + offs;
				map.m_len = es.es_len - offs;
				delalloc = true;
			}
		}
	} else if (flags & IOMAP_WRITE) {
		int dio_credits;
		handle_t *handle;
		int retries = 0;
@@ -3475,17 +3499,21 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
			}
		}
		ext4_journal_stop(handle);
	} else {
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret < 0)
			return ret;
	}

	iomap->flags = 0;
	iomap->bdev = inode->i_sb->s_bdev;
	iomap->dax_dev = sbi->s_daxdev;
	iomap->offset = first_block << blkbits;
	iomap->length = (u64)map.m_len << blkbits;

	if (ret == 0) {
		iomap->type = IOMAP_HOLE;
		iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
		iomap->addr = IOMAP_NULL_ADDR;
		iomap->length = (u64)map.m_len << blkbits;
	} else {
		if (map.m_flags & EXT4_MAP_MAPPED) {
			iomap->type = IOMAP_MAPPED;
@@ -3496,11 +3524,11 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
			return -EIO;
		}
		iomap->addr = (u64)map.m_pblk << blkbits;
		iomap->length = (u64)map.m_len << blkbits;
	}

	if (map.m_flags & EXT4_MAP_NEW)
		iomap->flags |= IOMAP_F_NEW;

	return 0;
}

@@ -3561,8 +3589,6 @@ const struct iomap_ops ext4_iomap_ops = {
	.iomap_end		= ext4_iomap_end,
};

#endif

static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
			    ssize_t size, void *private)
{
@@ -6118,70 +6144,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)

	return err;
}

/*
 * Find the first extent at or after @lblk in an inode that is not a hole.
 * Search for @map_len blocks at most. The extent is returned in @result.
 *
 * The function returns 1 if we found an extent. The function returns 0 in
 * case there is no extent at or after @lblk and in that case also sets
 * @result->es_len to 0. In case of error, the error code is returned.
 */
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
			 unsigned int map_len, struct extent_status *result)
{
	struct ext4_map_blocks map;
	struct extent_status es = {};
	int ret;

	map.m_lblk = lblk;
	map.m_len = map_len;

	/*
	 * For non-extent based files this loop may iterate several times since
	 * we do not determine full hole size.
	 */
	while (map.m_len > 0) {
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret < 0)
			return ret;
		/* There's extent covering m_lblk? Just return it. */
		if (ret > 0) {
			int status;

			ext4_es_store_pblock(result, map.m_pblk);
			result->es_lblk = map.m_lblk;
			result->es_len = map.m_len;
			if (map.m_flags & EXT4_MAP_UNWRITTEN)
				status = EXTENT_STATUS_UNWRITTEN;
			else
				status = EXTENT_STATUS_WRITTEN;
			ext4_es_store_status(result, status);
			return 1;
		}
		ext4_es_find_delayed_extent_range(inode, map.m_lblk,
						  map.m_lblk + map.m_len - 1,
						  &es);
		/* Is delalloc data before next block in extent tree? */
		if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
			ext4_lblk_t offset = 0;

			if (es.es_lblk < lblk)
				offset = lblk - es.es_lblk;
			result->es_lblk = es.es_lblk + offset;
			ext4_es_store_pblock(result,
					     ext4_es_pblock(&es) + offset);
			result->es_len = es.es_len - offset;
			ext4_es_store_status(result, ext4_es_status(&es));

			return 1;
		}
		/* There's a hole at m_lblk, advance us after it */
		map.m_lblk += map.m_len;
		map_len -= map.m_len;
		map.m_len = map_len;
		cond_resched();
	}
	result->es_len = 0;
	return 0;
}