Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2d90c160 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o
Browse files

ext4: more efficient SEEK_DATA implementation



Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as
ext4_seek_data() iterates hole block-by-block. Fix the problem by using
returned hole size from ext4_map_blocks() and thus skip the hole in one
go.

Update also SEEK_HOLE implementation to follow the same pattern as
SEEK_DATA to make future maintenance easier.

Furthermore we add cond_resched() to both ext4_seek_data() and
ext4_seek_hole() to avoid softlockups in case evil user creates huge
fragmented file and we have to go through lots of extents.

Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent e3fb8eb1
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
					int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
			      ext4_fsblk_t pblk, ext4_lblk_t len);
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
				unsigned int map_len,
				struct extent_status *result);

/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+36 −61
Original line number Diff line number Diff line
@@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 */
static int ext4_find_unwritten_pgoff(struct inode *inode,
				     int whence,
				     struct ext4_map_blocks *map,
				     ext4_lblk_t end_blk,
				     loff_t *offset)
{
	struct pagevec pvec;
@@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
	blkbits = inode->i_sb->s_blocksize_bits;
	startoff = *offset;
	lastoff = startoff;
	endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
	endoff = (loff_t)end_blk << blkbits;

	index = startoff >> PAGE_CACHE_SHIFT;
	end = endoff >> PAGE_CACHE_SHIFT;
@@ -559,12 +559,11 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
	struct ext4_map_blocks map;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t dataoff, isize;
	int blkbits;
	int ret = 0;
	int ret;

	inode_lock(inode);

@@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
	dataoff = offset;

	do {
		map.m_lblk = last;
		map.m_len = end - last + 1;
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
			if (last != start)
				dataoff = (loff_t)last << blkbits;
			break;
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret <= 0) {
			/* No extent found -> no data */
			if (ret == 0)
				ret = -ENXIO;
			inode_unlock(inode);
			return ret;
		}

		/*
		 * If there is a delay extent at this offset,
		 * it will be as a data.
		 */
		ext4_es_find_delayed_extent_range(inode, last, last, &es);
		if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
		last = es.es_lblk;
		if (last != start)
			dataoff = (loff_t)last << blkbits;
		if (!ext4_es_is_unwritten(&es))
			break;
		}

		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
			int unwritten;
			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
							      &map, &dataoff);
			if (unwritten)
		if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
					      es.es_lblk + es.es_len, &dataoff))
			break;
		}

		last++;
		last += es.es_len;
		dataoff = (loff_t)last << blkbits;
		cond_resched();
	} while (last <= end);

	inode_unlock(inode);
@@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
	struct ext4_map_blocks map;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t holeoff, isize;
	int blkbits;
	int ret = 0;
	int ret;

	inode_lock(inode);

@@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
	holeoff = offset;

	do {
		map.m_lblk = last;
		map.m_len = end - last + 1;
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
			last += ret;
			holeoff = (loff_t)last << blkbits;
			continue;
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret < 0) {
			inode_unlock(inode);
			return ret;
		}

		/*
		 * If there is a delay extent at this offset,
		 * we will skip this extent.
		 */
		ext4_es_find_delayed_extent_range(inode, last, last, &es);
		if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
			last = es.es_lblk + es.es_len;
		/* Found a hole? */
		if (ret == 0 || es.es_lblk > last) {
			if (last != start)
				holeoff = (loff_t)last << blkbits;
			continue;
			break;
		}

		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
			int unwritten;
			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
							      &map, &holeoff);
			if (!unwritten) {
				last += ret;
				holeoff = (loff_t)last << blkbits;
				continue;
			}
		}

		/* find a hole */
		if (ext4_es_is_unwritten(&es) &&
		    ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
					      last + es.es_len, &holeoff))
			break;

		last += es.es_len;
		holeoff = (loff_t)last << blkbits;
		cond_resched();
	} while (last <= end);

	inode_unlock(inode);
+67 −0
Original line number Diff line number Diff line
@@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)

	return err;
}

/*
 * Find the first extent at or after @lblk in an inode that is not a hole.
 * Search for @map_len blocks at most. The extent is returned in @result.
 *
 * The function returns 1 if we found an extent. The function returns 0 in
 * case there is no extent at or after @lblk and in that case also sets
 * @result->es_len to 0. In case of error, the error code is returned.
 */
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
			 unsigned int map_len, struct extent_status *result)
{
	struct ext4_map_blocks map;
	struct extent_status es = {};
	int ret;

	map.m_lblk = lblk;
	map.m_len = map_len;

	/*
	 * For non-extent based files this loop may iterate several times since
	 * we do not determine full hole size.
	 */
	while (map.m_len > 0) {
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret < 0)
			return ret;
		/* There's extent covering m_lblk? Just return it. */
		if (ret > 0) {
			int status;

			ext4_es_store_pblock(result, map.m_pblk);
			result->es_lblk = map.m_lblk;
			result->es_len = map.m_len;
			if (map.m_flags & EXT4_MAP_UNWRITTEN)
				status = EXTENT_STATUS_UNWRITTEN;
			else
				status = EXTENT_STATUS_WRITTEN;
			ext4_es_store_status(result, status);
			return 1;
		}
		ext4_es_find_delayed_extent_range(inode, map.m_lblk,
						  map.m_lblk + map.m_len - 1,
						  &es);
		/* Is delalloc data before next block in extent tree? */
		if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
			ext4_lblk_t offset = 0;

			if (es.es_lblk < lblk)
				offset = lblk - es.es_lblk;
			result->es_lblk = es.es_lblk + offset;
			ext4_es_store_pblock(result,
					     ext4_es_pblock(&es) + offset);
			result->es_len = es.es_len - offset;
			ext4_es_store_status(result, ext4_es_status(&es));

			return 1;
		}
		/* There's a hole at m_lblk, advance us after it */
		map.m_lblk += map.m_len;
		map_len -= map.m_len;
		map.m_len = map_len;
		cond_resched();
	}
	result->es_len = 0;
	return 0;
}