Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4fc8adcf authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull third hunk of vfs changes from Al Viro:
 "This contains the ->direct_IO() changes from Omar + saner
  generic_write_checks() + dealing with fcntl()/{read,write}() races
  (mirroring O_APPEND/O_DIRECT into iocb->ki_flags and instead of
  repeatedly looking at ->f_flags, which can be changed by fcntl(2),
  check ->ki_flags - which cannot) + infrastructure bits for dhowells'
  d_inode annotations + Christophs switch of /dev/loop to
  vfs_iter_write()"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (30 commits)
  block: loop: switch to VFS ITER_BVEC
  configfs: Fix inconsistent use of file_inode() vs file->f_path.dentry->d_inode
  VFS: Make pathwalk use d_is_reg() rather than S_ISREG()
  VFS: Fix up debugfs to use d_is_dir() in place of S_ISDIR()
  VFS: Combine inode checks with d_is_negative() and d_is_positive() in pathwalk
  NFS: Don't use d_inode as a variable name
  VFS: Impose ordering on accesses of d_inode and d_flags
  VFS: Add owner-filesystem positive/negative dentry checks
  nfs: generic_write_checks() shouldn't be done on swapout...
  ocfs2: use __generic_file_write_iter()
  mirror O_APPEND and O_DIRECT into iocb->ki_flags
  switch generic_write_checks() to iocb and iter
  ocfs2: move generic_write_checks() before the alignment checks
  ocfs2_file_write_iter: stop messing with ppos
  udf_file_write_iter: reorder and simplify
  fuse: ->direct_IO() doesn't need generic_write_checks()
  ext4_file_write_iter: move generic_write_checks() up
  xfs_file_aio_write_checks: switch to iocb/iov_iter
  generic_write_checks(): drop isblk argument
  blkdev_write_iter: expand generic_file_checks() call in there
  ...
parents 84588e7a aa4d8616
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -196,7 +196,7 @@ prototypes:
	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
	int (*releasepage) (struct page *, int);
	void (*freepage)(struct page *);
	int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
	int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
	int (*migratepage)(struct address_space *, struct page *, struct page *);
	int (*launder_page)(struct page *);
	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
+1 −1
Original line number Diff line number Diff line
@@ -590,7 +590,7 @@ struct address_space_operations {
	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
	int (*releasepage) (struct page *, int);
	void (*freepage)(struct page *);
	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
	/* migrate the contents of a page to the specified target */
	int (*migratepage) (struct page *, struct page *);
	int (*launder_page) (struct page *);
+120 −174
Original line number Diff line number Diff line
@@ -88,28 +88,6 @@ static int part_shift;

static struct workqueue_struct *loop_wq;

/*
 * Transfer functions
 */
static int transfer_none(struct loop_device *lo, int cmd,
			 struct page *raw_page, unsigned raw_off,
			 struct page *loop_page, unsigned loop_off,
			 int size, sector_t real_block)
{
	char *raw_buf = kmap_atomic(raw_page) + raw_off;
	char *loop_buf = kmap_atomic(loop_page) + loop_off;

	if (cmd == READ)
		memcpy(loop_buf, raw_buf, size);
	else
		memcpy(raw_buf, loop_buf, size);

	kunmap_atomic(loop_buf);
	kunmap_atomic(raw_buf);
	cond_resched();
	return 0;
}

static int transfer_xor(struct loop_device *lo, int cmd,
			struct page *raw_page, unsigned raw_off,
			struct page *loop_page, unsigned loop_off,
@@ -148,7 +126,6 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info)

static struct loop_func_table none_funcs = {
	.number = LO_CRYPT_NONE,
	.transfer = transfer_none,
}; 

static struct loop_func_table xor_funcs = {
@@ -215,207 +192,169 @@ lo_do_transfer(struct loop_device *lo, int cmd,
	       struct page *lpage, unsigned loffs,
	       int size, sector_t rblock)
{
	if (unlikely(!lo->transfer))
	int ret;

	ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
	if (likely(!ret))
		return 0;

	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
	printk_ratelimited(KERN_ERR
		"loop: Transfer error at byte offset %llu, length %i.\n",
		(unsigned long long)rblock << 9, size);
	return ret;
}

/**
 * __do_lo_send_write - helper for writing data to a loop device
 *
 * This helper just factors out common code between do_lo_send_direct_write()
 * and do_lo_send_write().
 */
static int __do_lo_send_write(struct file *file,
		u8 *buf, const int len, loff_t pos)
static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
{
	struct kvec kvec = {.iov_base = buf, .iov_len = len};
	struct iov_iter from;
	struct iov_iter i;
	ssize_t bw;

	iov_iter_kvec(&from, ITER_KVEC | WRITE, &kvec, 1, len);
	iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len);

	file_start_write(file);
	bw = vfs_iter_write(file, &from, &pos);
	bw = vfs_iter_write(file, &i, ppos);
	file_end_write(file);
	if (likely(bw == len))

	if (likely(bw ==  bvec->bv_len))
		return 0;
	printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
			(unsigned long long)pos, len);

	printk_ratelimited(KERN_ERR
		"loop: Write error at byte offset %llu, length %i.\n",
		(unsigned long long)*ppos, bvec->bv_len);
	if (bw >= 0)
		bw = -EIO;
	return bw;
}

/**
 * do_lo_send_direct_write - helper for writing data to a loop device
 *
 * This is the fast, non-transforming version that does not need double
 * buffering.
 */
static int do_lo_send_direct_write(struct loop_device *lo,
		struct bio_vec *bvec, loff_t pos, struct page *page)
static int lo_write_simple(struct loop_device *lo, struct request *rq,
		loff_t pos)
{
	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
			kmap(bvec->bv_page) + bvec->bv_offset,
			bvec->bv_len, pos);
	kunmap(bvec->bv_page);
	struct bio_vec bvec;
	struct req_iterator iter;
	int ret = 0;

	rq_for_each_segment(bvec, rq, iter) {
		ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos);
		if (ret < 0)
			break;
		cond_resched();
	return bw;
	}

/**
 * do_lo_send_write - helper for writing data to a loop device
 *
	return ret;
}

/*
 * This is the slow, transforming version that needs to double buffer the
 * data as it cannot do the transformations in place without having direct
 * access to the destination pages of the backing file.
 */
static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
		loff_t pos, struct page *page)
{
	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
			bvec->bv_offset, bvec->bv_len, pos >> 9);
	if (likely(!ret))
		return __do_lo_send_write(lo->lo_backing_file,
				page_address(page), bvec->bv_len,
				pos);
	printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, "
			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
	if (ret > 0)
		ret = -EIO;
	return ret;
}

static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos)
static int lo_write_transfer(struct loop_device *lo, struct request *rq,
		loff_t pos)
{
	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
			struct page *page);
	struct bio_vec bvec;
	struct bio_vec bvec, b;
	struct req_iterator iter;
	struct page *page = NULL;
	struct page *page;
	int ret = 0;

	if (lo->transfer != transfer_none) {
		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
	page = alloc_page(GFP_NOIO);
	if (unlikely(!page))
			goto fail;
		kmap(page);
		do_lo_send = do_lo_send_write;
	} else {
		do_lo_send = do_lo_send_direct_write;
	}
		return -ENOMEM;

	rq_for_each_segment(bvec, rq, iter) {
		ret = do_lo_send(lo, &bvec, pos, page);
		ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page,
			bvec.bv_offset, bvec.bv_len, pos >> 9);
		if (unlikely(ret))
			break;

		b.bv_page = page;
		b.bv_offset = 0;
		b.bv_len = bvec.bv_len;
		ret = lo_write_bvec(lo->lo_backing_file, &b, &pos);
		if (ret < 0)
			break;
		pos += bvec.bv_len;
	}
	if (page) {
		kunmap(page);

	__free_page(page);
	}
out:
	return ret;
fail:
	printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
	ret = -ENOMEM;
	goto out;
}

struct lo_read_data {
	struct loop_device *lo;
	struct page *page;
	unsigned offset;
	int bsize;
};

static int
lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
		struct splice_desc *sd)
static int lo_read_simple(struct loop_device *lo, struct request *rq,
		loff_t pos)
{
	struct lo_read_data *p = sd->u.data;
	struct loop_device *lo = p->lo;
	struct page *page = buf->page;
	sector_t IV;
	int size;

	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
							(buf->offset >> 9);
	size = sd->len;
	if (size > p->bsize)
		size = p->bsize;
	struct bio_vec bvec;
	struct req_iterator iter;
	struct iov_iter i;
	ssize_t len;

	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
		printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n",
		       page->index);
		size = -EINVAL;
	}
	rq_for_each_segment(bvec, rq, iter) {
		iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
		len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
		if (len < 0)
			return len;

	flush_dcache_page(p->page);
		flush_dcache_page(bvec.bv_page);

	if (size > 0)
		p->offset += size;
		if (len != bvec.bv_len) {
			struct bio *bio;

	return size;
			__rq_for_each_bio(bio, rq)
				zero_fill_bio(bio);
			break;
		}
		cond_resched();
	}

static int
lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
{
	return __splice_from_pipe(pipe, sd, lo_splice_actor);
	return 0;
}

static ssize_t
do_lo_receive(struct loop_device *lo,
	      struct bio_vec *bvec, int bsize, loff_t pos)
static int lo_read_transfer(struct loop_device *lo, struct request *rq,
		loff_t pos)
{
	struct lo_read_data cookie;
	struct splice_desc sd;
	struct file *file;
	ssize_t retval;
	struct bio_vec bvec, b;
	struct req_iterator iter;
	struct iov_iter i;
	struct page *page;
	ssize_t len;
	int ret = 0;

	cookie.lo = lo;
	cookie.page = bvec->bv_page;
	cookie.offset = bvec->bv_offset;
	cookie.bsize = bsize;
	page = alloc_page(GFP_NOIO);
	if (unlikely(!page))
		return -ENOMEM;

	sd.len = 0;
	sd.total_len = bvec->bv_len;
	sd.flags = 0;
	sd.pos = pos;
	sd.u.data = &cookie;
	rq_for_each_segment(bvec, rq, iter) {
		loff_t offset = pos;

	file = lo->lo_backing_file;
	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
		b.bv_page = page;
		b.bv_offset = 0;
		b.bv_len = bvec.bv_len;

	return retval;
		iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
		len = vfs_iter_read(lo->lo_backing_file, &i, &pos);
		if (len < 0) {
			ret = len;
			goto out_free_page;
		}

static int
lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos)
{
	struct bio_vec bvec;
	struct req_iterator iter;
	ssize_t s;
		ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page,
			bvec.bv_offset, len, offset >> 9);
		if (ret)
			goto out_free_page;

	rq_for_each_segment(bvec, rq, iter) {
		s = do_lo_receive(lo, &bvec, bsize, pos);
		if (s < 0)
			return s;
		flush_dcache_page(bvec.bv_page);

		if (s != bvec.bv_len) {
		if (len != bvec.bv_len) {
			struct bio *bio;

			__rq_for_each_bio(bio, rq)
				zero_fill_bio(bio);
			break;
		}
		pos += bvec.bv_len;
	}
	return 0;

	ret = 0;
out_free_page:
	__free_page(page);
	return ret;
}

static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
@@ -464,10 +403,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
			ret = lo_req_flush(lo, rq);
		else if (rq->cmd_flags & REQ_DISCARD)
			ret = lo_discard(lo, rq, pos);
		else if (lo->transfer)
			ret = lo_write_transfer(lo, rq, pos);
		else
			ret = lo_send(lo, rq, pos);
	} else
		ret = lo_receive(lo, rq, lo->lo_blocksize, pos);
			ret = lo_write_simple(lo, rq, pos);

	} else {
		if (lo->transfer)
			ret = lo_read_transfer(lo, rq, pos);
		else
			ret = lo_read_simple(lo, rq, pos);
	}

	return ret;
}
@@ -788,7 +734,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
	lo->lo_device = bdev;
	lo->lo_flags = lo_flags;
	lo->lo_backing_file = file;
	lo->transfer = transfer_none;
	lo->transfer = NULL;
	lo->ioctl = NULL;
	lo->lo_sizelimit = 0;
	lo->old_gfp_mask = mapping_gfp_mask(mapping);
+11 −11
Original line number Diff line number Diff line
@@ -359,8 +359,8 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
 * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */
#define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
		      ~(DT_MAX_BRW_SIZE - 1))
static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
			       struct iov_iter *iter, loff_t file_offset)
static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
			       loff_t file_offset)
{
	struct lu_env *env;
	struct cl_io *io;
@@ -399,7 +399,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
	 *    size changing by concurrent truncates and writes.
	 * 1. Need inode mutex to operate transient pages.
	 */
	if (rw == READ)
	if (iov_iter_rw(iter) == READ)
		mutex_lock(&inode->i_mutex);

	LASSERT(obj->cob_transient_pages == 0);
@@ -408,7 +408,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
		size_t offs;

		count = min_t(size_t, iov_iter_count(iter), size);
		if (rw == READ) {
		if (iov_iter_rw(iter) == READ) {
			if (file_offset >= i_size_read(inode))
				break;
			if (file_offset + count > i_size_read(inode))
@@ -418,11 +418,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
		result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
		if (likely(result > 0)) {
			int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
			result = ll_direct_IO_26_seg(env, io, rw, inode,
						     file->f_mapping,
						     result, file_offset,
						     pages, n);
			ll_free_user_pages(pages, n, rw==READ);
			result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter),
						     inode, file->f_mapping,
						     result, file_offset, pages,
						     n);
			ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ);
		}
		if (unlikely(result <= 0)) {
			/* If we can't allocate a large enough buffer
@@ -449,11 +449,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
	}
out:
	LASSERT(obj->cob_transient_pages == 0);
	if (rw == READ)
	if (iov_iter_rw(iter) == READ)
		mutex_unlock(&inode->i_mutex);

	if (tot_bytes > 0) {
		if (rw == WRITE) {
		if (iov_iter_rw(iter) == WRITE) {
			struct lov_stripe_md *lsm;

			lsm = ccc_inode_lsm_get(inode);
+2 −3
Original line number Diff line number Diff line
@@ -230,7 +230,6 @@ static int v9fs_launder_page(struct page *page)

/**
 * v9fs_direct_IO - 9P address space operation for direct I/O
 * @rw: direction (read or write)
 * @iocb: target I/O control block
 * @iov: array of vectors that define I/O buffer
 * @pos: offset in file to begin the operation
@@ -248,12 +247,12 @@ static int v9fs_launder_page(struct page *page)
 *
 */
static ssize_t
v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
	struct file *file = iocb->ki_filp;
	ssize_t n;
	int err = 0;
	if (rw & WRITE) {
	if (iov_iter_rw(iter) == WRITE) {
		n = p9_client_write(file->private_data, pos, iter, &err);
		if (n) {
			struct inode *inode = file_inode(file);
Loading