Merge branch 'xfs-dax-support' into for-next (66e8ac7b) · Commits · e / devices / android_kernel_sony_msm8998

fs/dax.c

+27 −7

Original line number	Original line	Diff line number	Diff line
	@@ -309,14 +309,21 @@ static int dax_insert_mapping(struct inode inode, struct buffer_head bh,
	out:		out:
	i_mmap_unlock_read(mapping);		i_mmap_unlock_read(mapping);

	if (bh->b_end_io)
	bh->b_end_io(bh, 1);

	return error;		return error;
	}		}

	static int do_dax_fault(struct vm_area_struct vma, struct vm_fault vmf,		/**
	get_block_t get_block)		* __dax_fault - handle a page fault on a DAX file
			* @vma: The virtual memory area where the fault occurred
			* @vmf: The description of the fault
			* @get_block: The filesystem method used to translate file offsets to blocks
			*
			* When a page fault occurs, filesystems may call this helper in their
			* fault handler for DAX files. __dax_fault() assumes the caller has done all
			* the necessary locking for the page fault to proceed successfully.
			*/
			int __dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
			get_block_t get_block, dax_iodone_t complete_unwritten)
	{		{
	struct file *file = vma->vm_file;		struct file *file = vma->vm_file;
	struct address_space *mapping = file->f_mapping;		struct address_space *mapping = file->f_mapping;
	@@ -417,7 +424,19 @@ static int do_dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
	page_cache_release(page);		page_cache_release(page);
	}		}

			/*
			* If we successfully insert the new mapping over an unwritten extent,
			* we need to ensure we convert the unwritten extent. If there is an
			* error inserting the mapping, the filesystem needs to leave it as
			* unwritten to prevent exposure of the stale underlying data to
			* userspace, but we still need to call the completion function so
			* the private resources on the mapping buffer can be released. We
			* indicate what the callback should do via the uptodate variable, same
			* as for normal BH based IO completions.
			*/
	error = dax_insert_mapping(inode, &bh, vma, vmf);		error = dax_insert_mapping(inode, &bh, vma, vmf);
			if (buffer_unwritten(&bh))
			complete_unwritten(&bh, !error);

	out:		out:
	if (error == -ENOMEM)		if (error == -ENOMEM)
	@@ -434,6 +453,7 @@ static int do_dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
	}		}
	goto out;		goto out;
	}		}
			EXPORT_SYMBOL(__dax_fault);

	/**		/**
	* dax_fault - handle a page fault on a DAX file		* dax_fault - handle a page fault on a DAX file
	@@ -445,7 +465,7 @@ static int do_dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
	* fault handler for DAX files.		* fault handler for DAX files.
	*/		*/
	int dax_fault(struct vm_area_struct vma, struct vm_fault vmf,		int dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
	get_block_t get_block)		get_block_t get_block, dax_iodone_t complete_unwritten)
	{		{
	int result;		int result;
	struct super_block *sb = file_inode(vma->vm_file)->i_sb;		struct super_block *sb = file_inode(vma->vm_file)->i_sb;
	@@ -454,7 +474,7 @@ int dax_fault(struct vm_area_struct vma, struct vm_fault vmf,
	sb_start_pagefault(sb);		sb_start_pagefault(sb);
	file_update_time(vma->vm_file);		file_update_time(vma->vm_file);
	}		}
	result = do_dax_fault(vma, vmf, get_block);		result = __dax_fault(vma, vmf, get_block, complete_unwritten);
	if (vmf->flags & FAULT_FLAG_WRITE)		if (vmf->flags & FAULT_FLAG_WRITE)
	sb_end_pagefault(sb);		sb_end_pagefault(sb);

fs/ext2/file.c

+2 −2

Original line number	Original line	Diff line number	Diff line
	@@ -28,12 +28,12 @@
	#ifdef CONFIG_FS_DAX		#ifdef CONFIG_FS_DAX
	static int ext2_dax_fault(struct vm_area_struct vma, struct vm_fault vmf)		static int ext2_dax_fault(struct vm_area_struct vma, struct vm_fault vmf)
	{		{
	return dax_fault(vma, vmf, ext2_get_block);		return dax_fault(vma, vmf, ext2_get_block, NULL);
	}		}

	static int ext2_dax_mkwrite(struct vm_area_struct vma, struct vm_fault vmf)		static int ext2_dax_mkwrite(struct vm_area_struct vma, struct vm_fault vmf)
	{		{
	return dax_mkwrite(vma, vmf, ext2_get_block);		return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
	}		}

	static const struct vm_operations_struct ext2_dax_vm_ops = {		static const struct vm_operations_struct ext2_dax_vm_ops = {

fs/ext4/file.c

+14 −2

Original line number	Original line	Diff line number	Diff line
	@@ -192,15 +192,27 @@ out:
	}		}

	#ifdef CONFIG_FS_DAX		#ifdef CONFIG_FS_DAX
			static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
			{
			struct inode *inode = bh->b_assoc_map->host;
			/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
			loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
			int err;
			if (!uptodate)
			return;
			WARN_ON(!buffer_unwritten(bh));
			err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
			}

	static int ext4_dax_fault(struct vm_area_struct vma, struct vm_fault vmf)		static int ext4_dax_fault(struct vm_area_struct vma, struct vm_fault vmf)
	{		{
	return dax_fault(vma, vmf, ext4_get_block);		return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
	/* Is this the right get_block? */		/* Is this the right get_block? */
	}		}

	static int ext4_dax_mkwrite(struct vm_area_struct vma, struct vm_fault vmf)		static int ext4_dax_mkwrite(struct vm_area_struct vma, struct vm_fault vmf)
	{		{
	return dax_mkwrite(vma, vmf, ext4_get_block);		return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
	}		}

	static const struct vm_operations_struct ext4_dax_vm_ops = {		static const struct vm_operations_struct ext4_dax_vm_ops = {

fs/ext4/inode.c

+7 −14

Original line number	Original line	Diff line number	Diff line
	@@ -656,18 +656,6 @@ has_zeroout:
	return retval;		return retval;
	}		}

	static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
	{
	struct inode *inode = bh->b_assoc_map->host;
	/* XXX: breaks on 32-bit > 16GB. Is that even supported? */
	loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
	int err;
	if (!uptodate)
	return;
	WARN_ON(!buffer_unwritten(bh));
	err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
	}

	/* Maximum number of blocks we map for direct IO at once. */		/* Maximum number of blocks we map for direct IO at once. */
	#define DIO_MAX_BLOCKS 4096		#define DIO_MAX_BLOCKS 4096

	@@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,

	map_bh(bh, inode->i_sb, map.m_pblk);		map_bh(bh, inode->i_sb, map.m_pblk);
	bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) \| map.m_flags;		bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) \| map.m_flags;
	if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {		if (IS_DAX(inode) && buffer_unwritten(bh)) {
			/*
			* dgc: I suspect unwritten conversion on ext4+DAX is
			* fundamentally broken here when there are concurrent
			* read/write in progress on this inode.
			*/
			WARN_ON_ONCE(io_end);
	bh->b_assoc_map = inode->i_mapping;		bh->b_assoc_map = inode->i_mapping;
	bh->b_private = (void *)(unsigned long)iblock;		bh->b_private = (void *)(unsigned long)iblock;
	bh->b_end_io = ext4_end_io_unwritten;
	}		}
	if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)		if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
	set_buffer_defer_completion(bh);		set_buffer_defer_completion(bh);

fs/xfs/xfs_aops.c

+110 −42

Original line number	Original line	Diff line number	Diff line
	@@ -1349,7 +1349,7 @@ __xfs_get_blocks(
	sector_t iblock,		sector_t iblock,
	struct buffer_head *bh_result,		struct buffer_head *bh_result,
	int create,		int create,
	int direct)		bool direct)
	{		{
	struct xfs_inode *ip = XFS_I(inode);		struct xfs_inode *ip = XFS_I(inode);
	struct xfs_mount *mp = ip->i_mount;		struct xfs_mount *mp = ip->i_mount;
	@@ -1414,6 +1414,7 @@ __xfs_get_blocks(
	if (error)		if (error)
	return error;		return error;
	new = 1;		new = 1;

	} else {		} else {
	/*		/*
	* Delalloc reservations do not require a transaction,		* Delalloc reservations do not require a transaction,
	@@ -1508,49 +1509,29 @@ xfs_get_blocks(
	struct buffer_head *bh_result,		struct buffer_head *bh_result,
	int create)		int create)
	{		{
	return __xfs_get_blocks(inode, iblock, bh_result, create, 0);		return __xfs_get_blocks(inode, iblock, bh_result, create, false);
	}		}

	STATIC int		int
	xfs_get_blocks_direct(		xfs_get_blocks_direct(
	struct inode *inode,		struct inode *inode,
	sector_t iblock,		sector_t iblock,
	struct buffer_head *bh_result,		struct buffer_head *bh_result,
	int create)		int create)
	{		{
	return __xfs_get_blocks(inode, iblock, bh_result, create, 1);		return __xfs_get_blocks(inode, iblock, bh_result, create, true);
	}		}

	/*		static void
	* Complete a direct I/O write request.		__xfs_end_io_direct_write(
	*		struct inode *inode,
	* The ioend structure is passed from __xfs_get_blocks() to tell us what to do.		struct xfs_ioend *ioend,
	* If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
	* wholly within the EOF and so there is nothing for us to do. Note that in this
	* case the completion can be called in interrupt context, whereas if we have an
	* ioend we will always be called in task context (i.e. from a workqueue).
	*/
	STATIC void
	xfs_end_io_direct_write(
	struct kiocb *iocb,
	loff_t offset,		loff_t offset,
	ssize_t size,		ssize_t size)
	void *private)
	{		{
	struct inode *inode = file_inode(iocb->ki_filp);		struct xfs_mount *mp = XFS_I(inode)->i_mount;
	struct xfs_inode *ip = XFS_I(inode);
	struct xfs_mount *mp = ip->i_mount;
	struct xfs_ioend *ioend = private;

	trace_xfs_gbmap_direct_endio(ip, offset, size,
	ioend ? ioend->io_type : 0, NULL);

	if (!ioend) {
	ASSERT(offset + size <= i_size_read(inode));
	return;
	}

	if (XFS_FORCED_SHUTDOWN(mp))		if (XFS_FORCED_SHUTDOWN(mp) \|\| ioend->io_error)
	goto out_end_io;		goto out_end_io;

	/*		/*
	@@ -1587,10 +1568,10 @@ xfs_end_io_direct_write(
	* here can result in EOF moving backwards and Bad Things Happen when		* here can result in EOF moving backwards and Bad Things Happen when
	* that occurs.		* that occurs.
	*/		*/
	spin_lock(&ip->i_flags_lock);		spin_lock(&XFS_I(inode)->i_flags_lock);
	if (offset + size > i_size_read(inode))		if (offset + size > i_size_read(inode))
	i_size_write(inode, offset + size);		i_size_write(inode, offset + size);
	spin_unlock(&ip->i_flags_lock);		spin_unlock(&XFS_I(inode)->i_flags_lock);

	/*		/*
	* If we are doing an append IO that needs to update the EOF on disk,		* If we are doing an append IO that needs to update the EOF on disk,
	@@ -1607,6 +1588,98 @@ out_end_io:
	return;		return;
	}		}

			/*
			* Complete a direct I/O write request.
			*
			* The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
			* If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
			* wholly within the EOF and so there is nothing for us to do. Note that in this
			* case the completion can be called in interrupt context, whereas if we have an
			* ioend we will always be called in task context (i.e. from a workqueue).
			*/
			STATIC void
			xfs_end_io_direct_write(
			struct kiocb *iocb,
			loff_t offset,
			ssize_t size,
			void *private)
			{
			struct inode *inode = file_inode(iocb->ki_filp);
			struct xfs_ioend *ioend = private;

			trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
			ioend ? ioend->io_type : 0, NULL);

			if (!ioend) {
			ASSERT(offset + size <= i_size_read(inode));
			return;
			}

			__xfs_end_io_direct_write(inode, ioend, offset, size);
			}

			/*
			* For DAX we need a mapping buffer callback for unwritten extent conversion
			* when page faults allocate blocks and then zero them. Note that in this
			* case the mapping indicated by the ioend may extend beyond EOF. We most
			* definitely do not want to extend EOF here, so we trim back the ioend size to
			* EOF.
			*/
			#ifdef CONFIG_FS_DAX
			void
			xfs_end_io_dax_write(
			struct buffer_head *bh,
			int uptodate)
			{
			struct xfs_ioend *ioend = bh->b_private;
			struct inode *inode = ioend->io_inode;
			ssize_t size = ioend->io_size;

			ASSERT(IS_DAX(ioend->io_inode));

			/* if there was an error zeroing, then don't convert it */
			if (!uptodate)
			ioend->io_error = -EIO;

			/*
			* Trim update to EOF, so we don't extend EOF during unwritten extent
			* conversion of partial EOF blocks.
			*/
			spin_lock(&XFS_I(inode)->i_flags_lock);
			if (ioend->io_offset + size > i_size_read(inode))
			size = i_size_read(inode) - ioend->io_offset;
			spin_unlock(&XFS_I(inode)->i_flags_lock);

			__xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);

			}
			#else
			void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
			#endif

			static inline ssize_t
			xfs_vm_do_dio(
			struct inode *inode,
			struct kiocb *iocb,
			struct iov_iter *iter,
			loff_t offset,
			void (endio)(struct kiocb iocb,
			loff_t offset,
			ssize_t size,
			void *private),
			int flags)
			{
			struct block_device *bdev;

			if (IS_DAX(inode))
			return dax_do_io(iocb, inode, iter, offset,
			xfs_get_blocks_direct, endio, 0);

			bdev = xfs_find_bdev_for_inode(inode);
			return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
			xfs_get_blocks_direct, endio, NULL, flags);
			}

	STATIC ssize_t		STATIC ssize_t
	xfs_vm_direct_IO(		xfs_vm_direct_IO(
	struct kiocb *iocb,		struct kiocb *iocb,
	@@ -1614,16 +1687,11 @@ xfs_vm_direct_IO(
	loff_t offset)		loff_t offset)
	{		{
	struct inode *inode = iocb->ki_filp->f_mapping->host;		struct inode *inode = iocb->ki_filp->f_mapping->host;
	struct block_device *bdev = xfs_find_bdev_for_inode(inode);

	if (iov_iter_rw(iter) == WRITE) {		if (iov_iter_rw(iter) == WRITE)
	return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,		return xfs_vm_do_dio(inode, iocb, iter, offset,
	xfs_get_blocks_direct,		xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
	xfs_end_io_direct_write, NULL,		return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
	DIO_ASYNC_EXTEND);
	}
	return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
	xfs_get_blocks_direct, NULL, NULL, 0);
	}		}

	/*		/*