Merge branch 'iomap-4.9-dax' into for-next (a1f45e66) · Commits · e / devices / android_kernel_fairphone_FP4

fs/dax.c

+240 −12

Original line number	Diff line number	Diff line
		@@ -31,6 +31,8 @@
		#include <linux/vmstat.h>
		#include <linux/pfn_t.h>
		#include <linux/sizes.h>
		#include <linux/iomap.h>
		#include "internal.h"

		/*
		* We use lowest available bit in exceptional entry for locking, other two
		@@ -580,14 +582,13 @@ static int dax_load_hole(struct address_space mapping, void entry,
		return VM_FAULT_LOCKED;
		}

		static int copy_user_bh(struct page to, struct inode inode,
		struct buffer_head *bh, unsigned long vaddr)
		static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
		struct page *to, unsigned long vaddr)
		{
		struct blk_dax_ctl dax = {
		.sector = to_sector(bh, inode),
		.size = bh->b_size,
		.sector = sector,
		.size = size,
		};
		struct block_device *bdev = bh->b_bdev;
		void *vto;

		if (dax_map_atomic(bdev, &dax) < 0)
		@@ -790,14 +791,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
		EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);

		static int dax_insert_mapping(struct address_space *mapping,
		struct buffer_head bh, void *entryp,
		struct vm_area_struct vma, struct vm_fault vmf)
		struct block_device *bdev, sector_t sector, size_t size,
		void *entryp, struct vm_area_struct vma, struct vm_fault *vmf)
		{
		unsigned long vaddr = (unsigned long)vmf->virtual_address;
		struct block_device *bdev = bh->b_bdev;
		struct blk_dax_ctl dax = {
		.sector = to_sector(bh, mapping->host),
		.size = bh->b_size,
		.sector = sector,
		.size = size,
		};
		void *ret;
		void entry = entryp;
		@@ -868,7 +868,8 @@ int dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
		if (vmf->cow_page) {
		struct page *new_page = vmf->cow_page;
		if (buffer_written(&bh))
		error = copy_user_bh(new_page, inode, &bh, vaddr);
		error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode),
		bh.b_size, new_page, vaddr);
		else
		clear_user_highpage(new_page, vaddr);
		if (error)
		@@ -898,7 +899,8 @@ int dax_fault(struct vm_area_struct vma, struct vm_fault vmf,

		/* Filesystem should not return unwritten buffers to us! */
		WARN_ON_ONCE(buffer_unwritten(&bh) \|\| buffer_new(&bh));
		error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf);
		error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode),
		bh.b_size, &entry, vma, vmf);
		unlock_entry:
		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
		out:
		@@ -1241,3 +1243,229 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
		return dax_zero_page_range(inode, from, length, get_block);
		}
		EXPORT_SYMBOL_GPL(dax_truncate_page);

		#ifdef CONFIG_FS_IOMAP
		static loff_t
		iomap_dax_actor(struct inode inode, loff_t pos, loff_t length, void data,
		struct iomap *iomap)
		{
		struct iov_iter *iter = data;
		loff_t end = pos + length, done = 0;
		ssize_t ret = 0;

		if (iov_iter_rw(iter) == READ) {
		end = min(end, i_size_read(inode));
		if (pos >= end)
		return 0;

		if (iomap->type == IOMAP_HOLE \|\| iomap->type == IOMAP_UNWRITTEN)
		return iov_iter_zero(min(length, end - pos), iter);
		}

		if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
		return -EIO;

		while (pos < end) {
		unsigned offset = pos & (PAGE_SIZE - 1);
		struct blk_dax_ctl dax = { 0 };
		ssize_t map_len;

		dax.sector = iomap->blkno +
		(((pos & PAGE_MASK) - iomap->offset) >> 9);
		dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK;
		map_len = dax_map_atomic(iomap->bdev, &dax);
		if (map_len < 0) {
		ret = map_len;
		break;
		}

		dax.addr += offset;
		map_len -= offset;
		if (map_len > end - pos)
		map_len = end - pos;

		if (iov_iter_rw(iter) == WRITE)
		map_len = copy_from_iter_pmem(dax.addr, map_len, iter);
		else
		map_len = copy_to_iter(dax.addr, map_len, iter);
		dax_unmap_atomic(iomap->bdev, &dax);
		if (map_len <= 0) {
		ret = map_len ? map_len : -EFAULT;
		break;
		}

		pos += map_len;
		length -= map_len;
		done += map_len;
		}

		return done ? done : ret;
		}

		/**
		* iomap_dax_rw - Perform I/O to a DAX file
		* @iocb: The control block for this I/O
		* @iter: The addresses to do I/O from or to
		* @ops: iomap ops passed from the file system
		*
		* This function performs read and write operations to directly mapped
		* persistent memory. The callers needs to take care of read/write exclusion
		* and evicting any page cache pages in the region under I/O.
		*/
		ssize_t
		iomap_dax_rw(struct kiocb iocb, struct iov_iter iter,
		struct iomap_ops *ops)
		{
		struct address_space *mapping = iocb->ki_filp->f_mapping;
		struct inode *inode = mapping->host;
		loff_t pos = iocb->ki_pos, ret = 0, done = 0;
		unsigned flags = 0;

		if (iov_iter_rw(iter) == WRITE)
		flags \|= IOMAP_WRITE;

		/*
		* Yes, even DAX files can have page cache attached to them: A zeroed
		* page is inserted into the pagecache when we have to serve a write
		* fault on a hole. It should never be dirtied and can simply be
		* dropped from the pagecache once we get real data for the page.
		*
		* XXX: This is racy against mmap, and there's nothing we can do about
		* it. We'll eventually need to shift this down even further so that
		* we can check if we allocated blocks over a hole first.
		*/
		if (mapping->nrpages) {
		ret = invalidate_inode_pages2_range(mapping,
		pos >> PAGE_SHIFT,
		(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
		WARN_ON_ONCE(ret);
		}

		while (iov_iter_count(iter)) {
		ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
		iter, iomap_dax_actor);
		if (ret <= 0)
		break;
		pos += ret;
		done += ret;
		}

		iocb->ki_pos += done;
		return done ? done : ret;
		}
		EXPORT_SYMBOL_GPL(iomap_dax_rw);

		/**
		* iomap_dax_fault - handle a page fault on a DAX file
		* @vma: The virtual memory area where the fault occurred
		* @vmf: The description of the fault
		* @ops: iomap ops passed from the file system
		*
		* When a page fault occurs, filesystems may call this helper in their fault
		* or mkwrite handler for DAX files. Assumes the caller has done all the
		* necessary locking for the page fault to proceed successfully.
		*/
		int iomap_dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
		struct iomap_ops *ops)
		{
		struct address_space *mapping = vma->vm_file->f_mapping;
		struct inode *inode = mapping->host;
		unsigned long vaddr = (unsigned long)vmf->virtual_address;
		loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
		sector_t sector;
		struct iomap iomap = { 0 };
		unsigned flags = 0;
		int error, major = 0;
		void *entry;

		/*
		* Check whether offset isn't beyond end of file now. Caller is supposed
		* to hold locks serializing us with truncate / punch hole so this is
		* a reliable test.
		*/
		if (pos >= i_size_read(inode))
		return VM_FAULT_SIGBUS;

		entry = grab_mapping_entry(mapping, vmf->pgoff);
		if (IS_ERR(entry)) {
		error = PTR_ERR(entry);
		goto out;
		}

		if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
		flags \|= IOMAP_WRITE;

		/*
		* Note that we don't bother to use iomap_apply here: DAX required
		* the file system block size to be equal the page size, which means
		* that we never have to deal with more than a single extent here.
		*/
		error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
		if (error)
		goto unlock_entry;
		if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
		error = -EIO; /* fs corruption? */
		goto unlock_entry;
		}

		sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);

		if (vmf->cow_page) {
		switch (iomap.type) {
		case IOMAP_HOLE:
		case IOMAP_UNWRITTEN:
		clear_user_highpage(vmf->cow_page, vaddr);
		break;
		case IOMAP_MAPPED:
		error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
		vmf->cow_page, vaddr);
		break;
		default:
		WARN_ON_ONCE(1);
		error = -EIO;
		break;
		}

		if (error)
		goto unlock_entry;
		if (!radix_tree_exceptional_entry(entry)) {
		vmf->page = entry;
		return VM_FAULT_LOCKED;
		}
		vmf->entry = entry;
		return VM_FAULT_DAX_LOCKED;
		}

		switch (iomap.type) {
		case IOMAP_MAPPED:
		if (iomap.flags & IOMAP_F_NEW) {
		count_vm_event(PGMAJFAULT);
		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
		major = VM_FAULT_MAJOR;
		}
		error = dax_insert_mapping(mapping, iomap.bdev, sector,
		PAGE_SIZE, &entry, vma, vmf);
		break;
		case IOMAP_UNWRITTEN:
		case IOMAP_HOLE:
		if (!(vmf->flags & FAULT_FLAG_WRITE))
		return dax_load_hole(mapping, entry, vmf);
		/FALLTHRU/
		default:
		WARN_ON_ONCE(1);
		error = -EIO;
		break;
		}

		unlock_entry:
		put_locked_mapping_entry(mapping, vmf->pgoff, entry);
		out:
		if (error == -ENOMEM)
		return VM_FAULT_OOM \| major;
		/* -EBUSY is fine, somebody else faulted on the same PTE */
		if (error < 0 && error != -EBUSY)
		return VM_FAULT_SIGBUS \| major;
		return VM_FAULT_NOPAGE \| major;
		}
		EXPORT_SYMBOL_GPL(iomap_dax_fault);
		#endif /* CONFIG_FS_IOMAP */

fs/ext2/Kconfig

+1 −0

Original line number	Diff line number	Diff line
		config EXT2_FS
		tristate "Second extended fs support"
		select FS_IOMAP if FS_DAX
		help
		Ext2 is a standard Linux file system for hard disks.

fs/ext2/ext2.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations;
		/* inode.c */
		extern const struct address_space_operations ext2_aops;
		extern const struct address_space_operations ext2_nobh_aops;
		extern struct iomap_ops ext2_iomap_ops;

		/* namei.c */
		extern const struct inode_operations ext2_dir_inode_operations;

fs/ext2/file.c

+69 −7

Original line number	Diff line number	Diff line
		@@ -22,11 +22,59 @@
		#include <linux/pagemap.h>
		#include <linux/dax.h>
		#include <linux/quotaops.h>
		#include <linux/iomap.h>
		#include <linux/uio.h>
		#include "ext2.h"
		#include "xattr.h"
		#include "acl.h"

		#ifdef CONFIG_FS_DAX
		static ssize_t ext2_dax_read_iter(struct kiocb iocb, struct iov_iter to)
		{
		struct inode *inode = iocb->ki_filp->f_mapping->host;
		ssize_t ret;

		if (!iov_iter_count(to))
		return 0; /* skip atime */

		inode_lock_shared(inode);
		ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops);
		inode_unlock_shared(inode);

		file_accessed(iocb->ki_filp);
		return ret;
		}

		static ssize_t ext2_dax_write_iter(struct kiocb iocb, struct iov_iter from)
		{
		struct file *file = iocb->ki_filp;
		struct inode *inode = file->f_mapping->host;
		ssize_t ret;

		inode_lock(inode);
		ret = generic_write_checks(iocb, from);
		if (ret <= 0)
		goto out_unlock;
		ret = file_remove_privs(file);
		if (ret)
		goto out_unlock;
		ret = file_update_time(file);
		if (ret)
		goto out_unlock;

		ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops);
		if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
		i_size_write(inode, iocb->ki_pos);
		mark_inode_dirty(inode);
		}

		out_unlock:
		inode_unlock(inode);
		if (ret > 0)
		ret = generic_write_sync(iocb, ret);
		return ret;
		}

		/*
		* The lock ordering for ext2 DAX fault paths is:
		*
		@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct vma, struct vm_fault vmf)
		}
		down_read(&ei->dax_sem);

		ret = dax_fault(vma, vmf, ext2_get_block);
		ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops);

		up_read(&ei->dax_sem);
		if (vmf->flags & FAULT_FLAG_WRITE)
		@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
		return ret;
		}

		/*
		* We have mostly NULL's here: the current defaults are ok for
		* the ext2 filesystem.
		*/
		static ssize_t ext2_file_read_iter(struct kiocb iocb, struct iov_iter to)
		{
		#ifdef CONFIG_FS_DAX
		if (IS_DAX(iocb->ki_filp->f_mapping->host))
		return ext2_dax_read_iter(iocb, to);
		#endif
		return generic_file_read_iter(iocb, to);
		}

		static ssize_t ext2_file_write_iter(struct kiocb iocb, struct iov_iter from)
		{
		#ifdef CONFIG_FS_DAX
		if (IS_DAX(iocb->ki_filp->f_mapping->host))
		return ext2_dax_write_iter(iocb, from);
		#endif
		return generic_file_write_iter(iocb, from);
		}

		const struct file_operations ext2_file_operations = {
		.llseek = generic_file_llseek,
		.read_iter = generic_file_read_iter,
		.write_iter = generic_file_write_iter,
		.read_iter = ext2_file_read_iter,
		.write_iter = ext2_file_write_iter,
		.unlocked_ioctl = ext2_ioctl,
		#ifdef CONFIG_COMPAT
		.compat_ioctl = ext2_compat_ioctl,

fs/ext2/inode.c

+81 −19

Original line number	Diff line number	Diff line
		@@ -32,6 +32,7 @@
		#include <linux/buffer_head.h>
		#include <linux/mpage.h>
		#include <linux/fiemap.h>
		#include <linux/iomap.h>
		#include <linux/namei.h>
		#include <linux/uio.h>
		#include "ext2.h"
		@@ -618,7 +619,7 @@ static void ext2_splice_branch(struct inode *inode,
		*/
		static int ext2_get_blocks(struct inode *inode,
		sector_t iblock, unsigned long maxblocks,
		struct buffer_head *bh_result,
		u32 bno, bool new, bool *boundary,
		int create)
		{
		int err = -EIO;
		@@ -644,7 +645,6 @@ static int ext2_get_blocks(struct inode *inode,
		/* Simplest case - block found, no allocation needed */
		if (!partial) {
		first_block = le32_to_cpu(chain[depth - 1].key);
		clear_buffer_new(bh_result); /* What's this do? */
		count++;
		/map more blocks/
		while (count < maxblocks && count <= blocks_to_boundary) {
		@@ -699,7 +699,6 @@ static int ext2_get_blocks(struct inode *inode,
		mutex_unlock(&ei->truncate_mutex);
		if (err)
		goto cleanup;
		clear_buffer_new(bh_result);
		goto got_it;
		}
		}
		@@ -745,15 +744,16 @@ static int ext2_get_blocks(struct inode *inode,
		mutex_unlock(&ei->truncate_mutex);
		goto cleanup;
		}
		} else
		set_buffer_new(bh_result);
		} else {
		*new = true;
		}

		ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
		mutex_unlock(&ei->truncate_mutex);
		got_it:
		map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
		*bno = le32_to_cpu(chain[depth-1].key);
		if (count > blocks_to_boundary)
		set_buffer_boundary(bh_result);
		*boundary = true;
		err = count;
		/* Clean up and exit */
		partial = chain + depth - 1; /* the whole chain */
		@@ -765,19 +765,82 @@ static int ext2_get_blocks(struct inode *inode,
		return err;
		}

		int ext2_get_block(struct inode inode, sector_t iblock, struct buffer_head bh_result, int create)
		int ext2_get_block(struct inode *inode, sector_t iblock,
		struct buffer_head *bh_result, int create)
		{
		unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
		int ret = ext2_get_blocks(inode, iblock, max_blocks,
		bh_result, create);
		if (ret > 0) {
		bool new = false, boundary = false;
		u32 bno;
		int ret;

		ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
		create);
		if (ret <= 0)
		return ret;

		map_bh(bh_result, inode->i_sb, bno);
		bh_result->b_size = (ret << inode->i_blkbits);
		ret = 0;
		if (new)
		set_buffer_new(bh_result);
		if (boundary)
		set_buffer_boundary(bh_result);
		return 0;

		}

		#ifdef CONFIG_FS_DAX
		static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
		unsigned flags, struct iomap *iomap)
		{
		unsigned int blkbits = inode->i_blkbits;
		unsigned long first_block = offset >> blkbits;
		unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
		bool new = false, boundary = false;
		u32 bno;
		int ret;

		ret = ext2_get_blocks(inode, first_block, max_blocks,
		&bno, &new, &boundary, flags & IOMAP_WRITE);
		if (ret < 0)
		return ret;

		iomap->flags = 0;
		iomap->bdev = inode->i_sb->s_bdev;
		iomap->offset = (u64)first_block << blkbits;

		if (ret == 0) {
		iomap->type = IOMAP_HOLE;
		iomap->blkno = IOMAP_NULL_BLOCK;
		iomap->length = 1 << blkbits;
		} else {
		iomap->type = IOMAP_MAPPED;
		iomap->blkno = (sector_t)bno << (blkbits - 9);
		iomap->length = (u64)ret << blkbits;
		iomap->flags \|= IOMAP_F_MERGED;
		}

		if (new)
		iomap->flags \|= IOMAP_F_NEW;
		return 0;
		}

		static int
		ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
		ssize_t written, unsigned flags, struct iomap *iomap)
		{
		if (iomap->type == IOMAP_MAPPED &&
		written < length &&
		(flags & IOMAP_WRITE))
		ext2_write_failed(inode->i_mapping, offset + length);
		return 0;
		}

		struct iomap_ops ext2_iomap_ops = {
		.iomap_begin = ext2_iomap_begin,
		.iomap_end = ext2_iomap_end,
		};
		#endif /* CONFIG_FS_DAX */

		int ext2_fiemap(struct inode inode, struct fiemap_extent_info fieinfo,
		u64 start, u64 len)
		{
		@@ -863,10 +926,9 @@ ext2_direct_IO(struct kiocb iocb, struct iov_iter iter)
		loff_t offset = iocb->ki_pos;
		ssize_t ret;

		if (IS_DAX(inode))
		ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
		DIO_LOCKING);
		else
		if (WARN_ON_ONCE(IS_DAX(inode)))
		return -EIO;

		ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
		if (ret < 0 && iov_iter_rw(iter) == WRITE)
		ext2_write_failed(mapping, offset + count);