Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 80eabba7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block

Pull fs meta data unmap optimization from Jens Axboe:
 "A series from Jan Kara, providing a more efficient way for unmapping
  meta data from in the buffer cache than doing it block-by-block.

  Provide a general helper that existing callers can use"

* 'for-4.10/fs-unmap' of git://git.kernel.dk/linux-block:
  fs: Remove unmap_underlying_metadata
  fs: Add helper to clean bdev aliases under a bh and use it
  ext2: Use clean_bdev_aliases() instead of iteration
  ext4: Use clean_bdev_aliases() instead of iteration
  direct-io: Use clean_bdev_aliases() instead of handmade iteration
  fs: Provide function to unmap metadata for a range of blocks
parents 852d21ae ce98321b
Loading
Loading
Loading
Loading
+73 −31
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
#include <linux/pagevec.h>
#include <trace/events/block.h>

static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -1604,37 +1605,80 @@ void create_empty_buffers(struct page *page,
}
EXPORT_SYMBOL(create_empty_buffers);

/*
 * We are taking a block for data and we don't want any output from any
 * buffer-cache aliases starting from return from that function and
 * until the moment when something will explicitly mark the buffer
 * dirty (hopefully that will not happen until we will free that block ;-)
 * We don't even need to mark it not-uptodate - nobody can expect
 * anything from a newly allocated buffer anyway. We used to used
 * unmap_buffer() for such invalidation, but that was wrong. We definitely
 * don't want to mark the alias unmapped, for example - it would confuse
 * anyone who might pick it with bread() afterwards...
 *
 * Also..  Note that bforget() doesn't lock the buffer.  So there can
 * be writeout I/O going on against recently-freed buffers.  We don't
 * wait on that I/O in bforget() - it's more efficient to wait on the I/O
 * only if we really need to.  That happens here.
 */
void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
/**
 * clean_bdev_aliases: clean a range of buffers in block device
 * @bdev: Block device to clean buffers in
 * @block: Start of a range of blocks to clean
 * @len: Number of blocks to clean
 *
 * We are taking a range of blocks for data and we don't want writeback of any
 * buffer-cache aliases starting from return from this function and until the
 * moment when something will explicitly mark the buffer dirty (hopefully that
 * will not happen until we will free that block ;-) We don't even need to mark
 * it not-uptodate - nobody can expect anything from a newly allocated buffer
 * anyway. We used to use unmap_buffer() for such invalidation, but that was
 * wrong. We definitely don't want to mark the alias unmapped, for example - it
 * would confuse anyone who might pick it with bread() afterwards...
 *
 * Also..  Note that bforget() doesn't lock the buffer.  So there can be
 * writeout I/O going on against recently-freed buffers.  We don't wait on that
 * I/O in bforget() - it's more efficient to wait on the I/O only if we really
 * need to.  That happens here.
 */
void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
{
	struct buffer_head *old_bh;
	struct inode *bd_inode = bdev->bd_inode;
	struct address_space *bd_mapping = bd_inode->i_mapping;
	struct pagevec pvec;
	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
	pgoff_t end;
	int i;
	struct buffer_head *bh;
	struct buffer_head *head;

	might_sleep();
	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
	pagevec_init(&pvec, 0);
	while (index <= end && pagevec_lookup(&pvec, bd_mapping, index,
			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
		for (i = 0; i < pagevec_count(&pvec); i++) {
			struct page *page = pvec.pages[i];

	old_bh = __find_get_block_slow(bdev, block);
	if (old_bh) {
		clear_buffer_dirty(old_bh);
		wait_on_buffer(old_bh);
		clear_buffer_req(old_bh);
		__brelse(old_bh);
			index = page->index;
			if (index > end)
				break;
			if (!page_has_buffers(page))
				continue;
			/*
			 * We use page lock instead of bd_mapping->private_lock
			 * to pin buffers here since we can afford to sleep and
			 * it scales better than a global spinlock lock.
			 */
			lock_page(page);
			/* Recheck when the page is locked which pins bhs */
			if (!page_has_buffers(page))
				goto unlock_page;
			head = page_buffers(page);
			bh = head;
			do {
				if (!buffer_mapped(bh))
					goto next;
				if (bh->b_blocknr >= block + len)
					break;
				clear_buffer_dirty(bh);
				wait_on_buffer(bh);
				clear_buffer_req(bh);
next:
				bh = bh->b_this_page;
			} while (bh != head);
unlock_page:
			unlock_page(page);
		}
		pagevec_release(&pvec);
		cond_resched();
		index++;
	}
}
EXPORT_SYMBOL(unmap_underlying_metadata);
EXPORT_SYMBOL(clean_bdev_aliases);

/*
 * Size is a power-of-two in the range 512..PAGE_SIZE,
@@ -1745,8 +1789,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
			if (buffer_new(bh)) {
				/* blockdev mappings never come here */
				clear_buffer_new(bh);
				unmap_underlying_metadata(bh->b_bdev,
							bh->b_blocknr);
				clean_bdev_bh_alias(bh);
			}
		}
		bh = bh->b_this_page;
@@ -1992,8 +2035,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
			}

			if (buffer_new(bh)) {
				unmap_underlying_metadata(bh->b_bdev,
							bh->b_blocknr);
				clean_bdev_bh_alias(bh);
				if (PageUptodate(page)) {
					clear_buffer_new(bh);
					set_buffer_uptodate(bh);
@@ -2633,7 +2675,7 @@ int nobh_write_begin(struct address_space *mapping,
		if (!buffer_mapped(bh))
			is_mapped_to_disk = 0;
		if (buffer_new(bh))
			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
			clean_bdev_bh_alias(bh);
		if (PageUptodate(page)) {
			set_buffer_uptodate(bh);
			continue;
+7 −21
Original line number Diff line number Diff line
@@ -842,24 +842,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
	return ret;
}

/*
 * Clean any dirty buffers in the blockdev mapping which alias newly-created
 * file blocks.  Only called for S_ISREG files - blockdevs do not set
 * buffer_new
 */
static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
{
	unsigned i;
	unsigned nblocks;

	nblocks = map_bh->b_size >> dio->inode->i_blkbits;

	for (i = 0; i < nblocks; i++) {
		unmap_underlying_metadata(map_bh->b_bdev,
					  map_bh->b_blocknr + i);
	}
}

/*
 * If we are not writing the entire block and get_block() allocated
 * the block for us, we need to fill-in the unused portion of the
@@ -960,11 +942,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
					goto do_holes;

				sdio->blocks_available =
						map_bh->b_size >> sdio->blkbits;
						map_bh->b_size >> blkbits;
				sdio->next_block_for_io =
					map_bh->b_blocknr << sdio->blkfactor;
				if (buffer_new(map_bh))
					clean_blockdev_aliases(dio, map_bh);
				if (buffer_new(map_bh)) {
					clean_bdev_aliases(
						map_bh->b_bdev,
						map_bh->b_blocknr,
						map_bh->b_size >> blkbits);
				}

				if (!sdio->blkfactor)
					goto do_holes;
+3 −6
Original line number Diff line number Diff line
@@ -732,16 +732,13 @@ static int ext2_get_blocks(struct inode *inode,
	}

	if (IS_DAX(inode)) {
		int i;

		/*
		 * We must unmap blocks before zeroing so that writeback cannot
		 * overwrite zeros with stale data from block device page cache.
		 */
		for (i = 0; i < count; i++) {
			unmap_underlying_metadata(inode->i_sb->s_bdev,
					le32_to_cpu(chain[depth-1].key) + i);
		}
		clean_bdev_aliases(inode->i_sb->s_bdev,
				   le32_to_cpu(chain[depth-1].key),
				   count);
		/*
		 * block must be initialised before we put it in the tree
		 * so that it's not found by another thread before it's
+2 −11
Original line number Diff line number Diff line
@@ -3777,14 +3777,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
	return err;
}

static void unmap_underlying_metadata_blocks(struct block_device *bdev,
			sector_t block, int count)
{
	int i;
	for (i = 0; i < count; i++)
                unmap_underlying_metadata(bdev, block + i);
}

/*
 * Handle EOFBLOCKS_FL flag, clearing it if necessary
 */
@@ -4121,8 +4113,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
	 * new.
	 */
	if (allocated > map->m_len) {
		unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
					newblock + map->m_len,
		clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
				   allocated - map->m_len);
		allocated = map->m_len;
	}
+5 −13
Original line number Diff line number Diff line
@@ -661,12 +661,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
		if (flags & EXT4_GET_BLOCKS_ZERO &&
		    map->m_flags & EXT4_MAP_MAPPED &&
		    map->m_flags & EXT4_MAP_NEW) {
			ext4_lblk_t i;

			for (i = 0; i < map->m_len; i++) {
				unmap_underlying_metadata(inode->i_sb->s_bdev,
							  map->m_pblk + i);
			}
			clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
					   map->m_len);
			ret = ext4_issue_zeroout(inode, map->m_lblk,
						 map->m_pblk, map->m_len);
			if (ret) {
@@ -1137,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
			if (err)
				break;
			if (buffer_new(bh)) {
				unmap_underlying_metadata(bh->b_bdev,
							  bh->b_blocknr);
				clean_bdev_bh_alias(bh);
				if (PageUptodate(page)) {
					clear_buffer_new(bh);
					set_buffer_uptodate(bh);
@@ -2371,11 +2366,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)

	BUG_ON(map->m_len == 0);
	if (map->m_flags & EXT4_MAP_NEW) {
		struct block_device *bdev = inode->i_sb->s_bdev;
		int i;

		for (i = 0; i < map->m_len; i++)
			unmap_underlying_metadata(bdev, map->m_pblk + i);
		clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
				   map->m_len);
	}
	return 0;
}
Loading