Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 16432985 authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: Add O_DIRECT read and write (writes == buffered + cache flush)



This adds basic O_DIRECT read and write support.  In the write case, we
just do a normal buffered write followed by a cache flush.  O_DIRECT +
O_SYNC are required to trigger metadata syncs.

In the read case, there is a basic btrfs_get_block call for use by
the generic O_DIRECT code.  This does honor multi-volume mapping rules
but it skips all checksumming.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 85d824c4
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -796,8 +796,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
		     PAGE_CACHE_SIZE / (sizeof(struct page *)));
	pinned[0] = NULL;
	pinned[1] = NULL;
	if (file->f_flags & O_DIRECT)
		return -EINVAL;

	pos = *ppos;
	start_pos = pos;
@@ -909,6 +907,15 @@ out_nolock:
				      start_pos, num_written);
		if (err < 0)
			num_written = err;
	} else if (num_written > 0 && (file->f_flags & O_DIRECT)) {
		do_sync_mapping_range(inode->i_mapping, start_pos,
				      start_pos + num_written - 1,
				      SYNC_FILE_RANGE_WRITE |
				      SYNC_FILE_RANGE_WAIT_AFTER);

		invalidate_mapping_pages(inode->i_mapping,
		      start_pos >> PAGE_CACHE_SHIFT,
		     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
	}
	current->backing_dev_info = NULL;
	return num_written ? num_written : err;
+66 −0
Original line number Diff line number Diff line
@@ -2265,6 +2265,70 @@ out:
	return em;
}

static int btrfs_get_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	struct extent_map *em;
	u64 start = (u64)iblock << inode->i_blkbits;
	struct btrfs_multi_bio *multi = NULL;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	u64 len;
	u64 logical;
	u64 map_length;
	int ret = 0;

	em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);

	if (!em || IS_ERR(em))
		goto out;

	if (em->start > start || em->start + em->len <= start)
	    goto out;

	if (em->block_start == EXTENT_MAP_INLINE) {
		ret = -EINVAL;
		goto out;
	}

	if (em->block_start == EXTENT_MAP_HOLE ||
	    em->block_start == EXTENT_MAP_DELALLOC) {
		goto out;
	}

	len = em->start + em->len - start;
	len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));

	logical = start - em->start;
	logical = em->block_start + logical;

	map_length = len;
	ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
			      logical, &map_length, &multi, 0);
	BUG_ON(ret);
	bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
	bh_result->b_size = min(map_length, len);
	bh_result->b_bdev = multi->stripes[0].dev->bdev;
	set_buffer_mapped(bh_result);
	kfree(multi);
out:
	free_extent_map(em);
	return ret;
}

static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
			const struct iovec *iov, loff_t offset,
			unsigned long nr_segs)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;

	if (rw == WRITE)
		return -EINVAL;

	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
				  offset, nr_segs, btrfs_get_block, NULL);
}

static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
{
	return extent_bmap(mapping, iblock, btrfs_get_extent);
@@ -3136,6 +3200,7 @@ out_fail:
	btrfs_throttle(root);
	return err;
}

static int btrfs_permission(struct inode *inode, int mask,
			    struct nameidata *nd)
{
@@ -3193,6 +3258,7 @@ static struct address_space_operations btrfs_aops = {
	.readpages	= btrfs_readpages,
	.sync_page	= block_sync_page,
	.bmap		= btrfs_bmap,
	.direct_IO	= btrfs_direct_IO,
	.invalidatepage = btrfs_invalidatepage,
	.releasepage	= btrfs_releasepage,
	.set_page_dirty	= __set_page_dirty_nobuffers,