Merge tag 'xfs-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs (0e6acf02) · Commits · e / devices / android_kernel_fairphone_FP4

fs/Kconfig

+3 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,9 @@ config DCACHE_WORD_ACCESS

		if BLOCK

		config FS_IOMAP
		bool

		source "fs/ext2/Kconfig"
		source "fs/ext4/Kconfig"
		source "fs/jbd2/Kconfig"

fs/Makefile

+1 −0

Original line number	Diff line number	Diff line
		@@ -49,6 +49,7 @@ obj-$(CONFIG_COREDUMP) += coredump.o
		obj-$(CONFIG_SYSCTL) += drop_caches.o

		obj-$(CONFIG_FHANDLE) += fhandle.o
		obj-$(CONFIG_FS_IOMAP) += iomap.o

		obj-y += quota/

fs/buffer.c

+71 −5

Original line number	Diff line number	Diff line
		@@ -21,6 +21,7 @@
		#include <linux/kernel.h>
		#include <linux/syscalls.h>
		#include <linux/fs.h>
		#include <linux/iomap.h>
		#include <linux/mm.h>
		#include <linux/percpu.h>
		#include <linux/slab.h>
		@@ -1892,8 +1893,62 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
		}
		EXPORT_SYMBOL(page_zero_new_buffers);

		int __block_write_begin(struct page *page, loff_t pos, unsigned len,
		get_block_t *get_block)
		static void
		iomap_to_bh(struct inode inode, sector_t block, struct buffer_head bh,
		struct iomap *iomap)
		{
		loff_t offset = block << inode->i_blkbits;

		bh->b_bdev = iomap->bdev;

		/*
		* Block points to offset in file we need to map, iomap contains
		* the offset at which the map starts. If the map ends before the
		* current block, then do not map the buffer and let the caller
		* handle it.
		*/
		BUG_ON(offset >= iomap->offset + iomap->length);

		switch (iomap->type) {
		case IOMAP_HOLE:
		/*
		* If the buffer is not up to date or beyond the current EOF,
		* we need to mark it as new to ensure sub-block zeroing is
		* executed if necessary.
		*/
		if (!buffer_uptodate(bh) \|\|
		(offset >= i_size_read(inode)))
		set_buffer_new(bh);
		break;
		case IOMAP_DELALLOC:
		if (!buffer_uptodate(bh) \|\|
		(offset >= i_size_read(inode)))
		set_buffer_new(bh);
		set_buffer_uptodate(bh);
		set_buffer_mapped(bh);
		set_buffer_delay(bh);
		break;
		case IOMAP_UNWRITTEN:
		/*
		* For unwritten regions, we always need to ensure that
		* sub-block writes cause the regions in the block we are not
		* writing to are zeroed. Set the buffer as new to ensure this.
		*/
		set_buffer_new(bh);
		set_buffer_unwritten(bh);
		/* FALLTHRU */
		case IOMAP_MAPPED:
		if (offset >= i_size_read(inode))
		set_buffer_new(bh);
		bh->b_blocknr = (iomap->blkno >> (inode->i_blkbits - 9)) +
		((offset - iomap->offset) >> inode->i_blkbits);
		set_buffer_mapped(bh);
		break;
		}
		}

		int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
		get_block_t get_block, struct iomap iomap)
		{
		unsigned from = pos & (PAGE_SIZE - 1);
		unsigned to = from + len;
		@@ -1929,9 +1984,14 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
		clear_buffer_new(bh);
		if (!buffer_mapped(bh)) {
		WARN_ON(bh->b_size != blocksize);
		if (get_block) {
		err = get_block(inode, block, bh, 1);
		if (err)
		break;
		} else {
		iomap_to_bh(inode, block, bh, iomap);
		}

		if (buffer_new(bh)) {
		unmap_underlying_metadata(bh->b_bdev,
		bh->b_blocknr);
		@@ -1972,6 +2032,12 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
		page_zero_new_buffers(page, from, to);
		return err;
		}

		int __block_write_begin(struct page *page, loff_t pos, unsigned len,
		get_block_t *get_block)
		{
		return __block_write_begin_int(page, pos, len, get_block, NULL);
		}
		EXPORT_SYMBOL(__block_write_begin);

		static int __block_commit_write(struct inode inode, struct page page,

fs/internal.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -11,6 +11,7 @@

		struct super_block;
		struct file_system_type;
		struct iomap;
		struct linux_binprm;
		struct path;
		struct mount;
		@@ -39,6 +40,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
		* buffer.c
		*/
		extern void guard_bio_eod(int rw, struct bio *bio);
		extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
		get_block_t get_block, struct iomap iomap);

		/*
		* char_dev.c

fs/iomap.c

0 → 100644

+497 −0

Original line number	Diff line number	Diff line
		/*
		* Copyright (C) 2010 Red Hat, Inc.
		* Copyright (c) 2016 Christoph Hellwig.
		*
		* This program is free software; you can redistribute it and/or modify it
		* under the terms and conditions of the GNU General Public License,
		* version 2, as published by the Free Software Foundation.
		*
		* This program is distributed in the hope it will be useful, but WITHOUT
		* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		* more details.
		*/
		#include <linux/module.h>
		#include <linux/compiler.h>
		#include <linux/fs.h>
		#include <linux/iomap.h>
		#include <linux/uaccess.h>
		#include <linux/gfp.h>
		#include <linux/mm.h>
		#include <linux/swap.h>
		#include <linux/pagemap.h>
		#include <linux/file.h>
		#include <linux/uio.h>
		#include <linux/backing-dev.h>
		#include <linux/buffer_head.h>
		#include <linux/dax.h>
		#include "internal.h"

		typedef loff_t (iomap_actor_t)(struct inode inode, loff_t pos, loff_t len,
		void data, struct iomap iomap);

		/*
		* Execute a iomap write on a segment of the mapping that spans a
		* contiguous range of pages that have identical block mapping state.
		*
		* This avoids the need to map pages individually, do individual allocations
		* for each page and most importantly avoid the need for filesystem specific
		* locking per page. Instead, all the operations are amortised over the entire
		* range of pages. It is assumed that the filesystems will lock whatever
		* resources they require in the iomap_begin call, and release them in the
		* iomap_end call.
		*/
		static loff_t
		iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
		struct iomap_ops ops, void data, iomap_actor_t actor)
		{
		struct iomap iomap = { 0 };
		loff_t written = 0, ret;

		/*
		* Need to map a range from start position for length bytes. This can
		* span multiple pages - it is only guaranteed to return a range of a
		* single type of pages (e.g. all into a hole, all mapped or all
		* unwritten). Failure at this point has nothing to undo.
		*
		* If allocation is required for this range, reserve the space now so
		* that the allocation is guaranteed to succeed later on. Once we copy
		* the data into the page cache pages, then we cannot fail otherwise we
		* expose transient stale data. If the reserve fails, we can safely
		* back out at this point as there is nothing to undo.
		*/
		ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
		if (ret)
		return ret;
		if (WARN_ON(iomap.offset > pos))
		return -EIO;

		/*
		* Cut down the length to the one actually provided by the filesystem,
		* as it might not be able to give us the whole size that we requested.
		*/
		if (iomap.offset + iomap.length < pos + length)
		length = iomap.offset + iomap.length - pos;

		/*
		* Now that we have guaranteed that the space allocation will succeed.
		* we can do the copy-in page by page without having to worry about
		* failures exposing transient data.
		*/
		written = actor(inode, pos, length, data, &iomap);

		/*
		* Now the data has been copied, commit the range we've copied. This
		* should not fail unless the filesystem has had a fatal error.
		*/
		ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
		flags, &iomap);

		return written ? written : ret;
		}

		static void
		iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
		{
		loff_t i_size = i_size_read(inode);

		/*
		* Only truncate newly allocated pages beyoned EOF, even if the
		* write started inside the existing inode size.
		*/
		if (pos + len > i_size)
		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
		}

		static int
		iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
		struct page *pagep, struct iomap iomap)
		{
		pgoff_t index = pos >> PAGE_SHIFT;
		struct page *page;
		int status = 0;

		BUG_ON(pos + len > iomap->offset + iomap->length);

		page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
		if (!page)
		return -ENOMEM;

		status = __block_write_begin_int(page, pos, len, NULL, iomap);
		if (unlikely(status)) {
		unlock_page(page);
		put_page(page);
		page = NULL;

		iomap_write_failed(inode, pos, len);
		}

		*pagep = page;
		return status;
		}

		static int
		iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
		unsigned copied, struct page *page)
		{
		int ret;

		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
		copied, page, NULL);
		if (ret < len)
		iomap_write_failed(inode, pos, len);
		return ret;
		}

		static loff_t
		iomap_write_actor(struct inode inode, loff_t pos, loff_t length, void data,
		struct iomap *iomap)
		{
		struct iov_iter *i = data;
		long status = 0;
		ssize_t written = 0;
		unsigned int flags = AOP_FLAG_NOFS;

		/*
		* Copies from kernel address space cannot fail (NFSD is a big user).
		*/
		if (!iter_is_iovec(i))
		flags \|= AOP_FLAG_UNINTERRUPTIBLE;

		do {
		struct page *page;
		unsigned long offset; /* Offset into pagecache page */
		unsigned long bytes; /* Bytes to write to page */
		size_t copied; /* Bytes copied from user */

		offset = (pos & (PAGE_SIZE - 1));
		bytes = min_t(unsigned long, PAGE_SIZE - offset,
		iov_iter_count(i));
		again:
		if (bytes > length)
		bytes = length;

		/*
		* Bring in the user page that we will copy from _first_.
		* Otherwise there's a nasty deadlock on copying from the
		* same page as we're writing to, without it being marked
		* up-to-date.
		*
		* Not only is this an optimisation, but it is also required
		* to check that the address is actually valid, when atomic
		* usercopies are used, below.
		*/
		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
		status = -EFAULT;
		break;
		}

		status = iomap_write_begin(inode, pos, bytes, flags, &page,
		iomap);
		if (unlikely(status))
		break;

		if (mapping_writably_mapped(inode->i_mapping))
		flush_dcache_page(page);

		pagefault_disable();
		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
		pagefault_enable();

		flush_dcache_page(page);
		mark_page_accessed(page);

		status = iomap_write_end(inode, pos, bytes, copied, page);
		if (unlikely(status < 0))
		break;
		copied = status;

		cond_resched();

		iov_iter_advance(i, copied);
		if (unlikely(copied == 0)) {
		/*
		* If we were unable to copy any data at all, we must
		* fall back to a single segment length write.
		*
		* If we didn't fallback here, we could livelock
		* because not all segments in the iov can be copied at
		* once without a pagefault.
		*/
		bytes = min_t(unsigned long, PAGE_SIZE - offset,
		iov_iter_single_seg_count(i));
		goto again;
		}
		pos += copied;
		written += copied;
		length -= copied;

		balance_dirty_pages_ratelimited(inode->i_mapping);
		} while (iov_iter_count(i) && length);

		return written ? written : status;
		}

		ssize_t
		iomap_file_buffered_write(struct kiocb iocb, struct iov_iter iter,
		struct iomap_ops *ops)
		{
		struct inode *inode = iocb->ki_filp->f_mapping->host;
		loff_t pos = iocb->ki_pos, ret = 0, written = 0;

		while (iov_iter_count(iter)) {
		ret = iomap_apply(inode, pos, iov_iter_count(iter),
		IOMAP_WRITE, ops, iter, iomap_write_actor);
		if (ret <= 0)
		break;
		pos += ret;
		written += ret;
		}

		return written ? written : ret;
		}
		EXPORT_SYMBOL_GPL(iomap_file_buffered_write);

		static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
		unsigned bytes, struct iomap *iomap)
		{
		struct page *page;
		int status;

		status = iomap_write_begin(inode, pos, bytes,
		AOP_FLAG_UNINTERRUPTIBLE \| AOP_FLAG_NOFS, &page, iomap);
		if (status)
		return status;

		zero_user(page, offset, bytes);
		mark_page_accessed(page);

		return iomap_write_end(inode, pos, bytes, bytes, page);
		}

		static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
		struct iomap *iomap)
		{
		sector_t sector = iomap->blkno +
		(((pos & ~(PAGE_SIZE - 1)) - iomap->offset) >> 9);

		return __dax_zero_page_range(iomap->bdev, sector, offset, bytes);
		}

		static loff_t
		iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
		void data, struct iomap iomap)
		{
		bool *did_zero = data;
		loff_t written = 0;
		int status;

		/* already zeroed? we're done. */
		if (iomap->type == IOMAP_HOLE \|\| iomap->type == IOMAP_UNWRITTEN)
		return count;

		do {
		unsigned offset, bytes;

		offset = pos & (PAGE_SIZE - 1); /* Within page */
		bytes = min_t(unsigned, PAGE_SIZE - offset, count);

		if (IS_DAX(inode))
		status = iomap_dax_zero(pos, offset, bytes, iomap);
		else
		status = iomap_zero(inode, pos, offset, bytes, iomap);
		if (status < 0)
		return status;

		pos += bytes;
		count -= bytes;
		written += bytes;
		if (did_zero)
		*did_zero = true;
		} while (count > 0);

		return written;
		}

		int
		iomap_zero_range(struct inode inode, loff_t pos, loff_t len, bool did_zero,
		struct iomap_ops *ops)
		{
		loff_t ret;

		while (len > 0) {
		ret = iomap_apply(inode, pos, len, IOMAP_ZERO,
		ops, did_zero, iomap_zero_range_actor);
		if (ret <= 0)
		return ret;

		pos += ret;
		len -= ret;
		}

		return 0;
		}
		EXPORT_SYMBOL_GPL(iomap_zero_range);

		int
		iomap_truncate_page(struct inode inode, loff_t pos, bool did_zero,
		struct iomap_ops *ops)
		{
		unsigned blocksize = (1 << inode->i_blkbits);
		unsigned off = pos & (blocksize - 1);

		/* Block boundary? Nothing to do */
		if (!off)
		return 0;
		return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
		}
		EXPORT_SYMBOL_GPL(iomap_truncate_page);

		static loff_t
		iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
		void data, struct iomap iomap)
		{
		struct page *page = data;
		int ret;

		ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
		NULL, iomap);
		if (ret)
		return ret;

		block_commit_write(page, 0, length);
		return length;
		}

		int iomap_page_mkwrite(struct vm_area_struct vma, struct vm_fault vmf,
		struct iomap_ops *ops)
		{
		struct page *page = vmf->page;
		struct inode *inode = file_inode(vma->vm_file);
		unsigned long length;
		loff_t offset, size;
		ssize_t ret;

		lock_page(page);
		size = i_size_read(inode);
		if ((page->mapping != inode->i_mapping) \|\|
		(page_offset(page) > size)) {
		/* We overload EFAULT to mean page got truncated */
		ret = -EFAULT;
		goto out_unlock;
		}

		/* page is wholly or partially inside EOF */
		if (((page->index + 1) << PAGE_SHIFT) > size)
		length = size & ~PAGE_MASK;
		else
		length = PAGE_SIZE;

		offset = page_offset(page);
		while (length > 0) {
		ret = iomap_apply(inode, offset, length, IOMAP_WRITE,
		ops, page, iomap_page_mkwrite_actor);
		if (unlikely(ret <= 0))
		goto out_unlock;
		offset += ret;
		length -= ret;
		}

		set_page_dirty(page);
		wait_for_stable_page(page);
		return 0;
		out_unlock:
		unlock_page(page);
		return ret;
		}
		EXPORT_SYMBOL_GPL(iomap_page_mkwrite);

		struct fiemap_ctx {
		struct fiemap_extent_info *fi;
		struct iomap prev;
		};

		static int iomap_to_fiemap(struct fiemap_extent_info *fi,
		struct iomap *iomap, u32 flags)
		{
		switch (iomap->type) {
		case IOMAP_HOLE:
		/* skip holes */
		return 0;
		case IOMAP_DELALLOC:
		flags \|= FIEMAP_EXTENT_DELALLOC \| FIEMAP_EXTENT_UNKNOWN;
		break;
		case IOMAP_UNWRITTEN:
		flags \|= FIEMAP_EXTENT_UNWRITTEN;
		break;
		case IOMAP_MAPPED:
		break;
		}

		return fiemap_fill_next_extent(fi, iomap->offset,
		iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
		iomap->length, flags \| FIEMAP_EXTENT_MERGED);

		}

		static loff_t
		iomap_fiemap_actor(struct inode inode, loff_t pos, loff_t length, void data,
		struct iomap *iomap)
		{
		struct fiemap_ctx *ctx = data;
		loff_t ret = length;

		if (iomap->type == IOMAP_HOLE)
		return length;

		ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0);
		ctx->prev = *iomap;
		switch (ret) {
		case 0: /* success */
		return length;
		case 1: /* extent array full */
		return 0;
		default:
		return ret;
		}
		}

		int iomap_fiemap(struct inode inode, struct fiemap_extent_info fi,
		loff_t start, loff_t len, struct iomap_ops *ops)
		{
		struct fiemap_ctx ctx;
		loff_t ret;

		memset(&ctx, 0, sizeof(ctx));
		ctx.fi = fi;
		ctx.prev.type = IOMAP_HOLE;

		ret = fiemap_check_flags(fi, FIEMAP_FLAG_SYNC);
		if (ret)
		return ret;

		ret = filemap_write_and_wait(inode->i_mapping);
		if (ret)
		return ret;

		while (len > 0) {
		ret = iomap_apply(inode, start, len, 0, ops, &ctx,
		iomap_fiemap_actor);
		if (ret < 0)
		return ret;
		if (ret == 0)
		break;

		start += ret;
		len -= ret;
		}

		if (ctx.prev.type != IOMAP_HOLE) {
		ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST);
		if (ret < 0)
		return ret;
		}

		return 0;
		}
		EXPORT_SYMBOL_GPL(iomap_fiemap);