Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d15fee81 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fuse update from Miklos Szeredi:
 "This series adds cached writeback support to fuse, improving write
  throughput"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: fix "uninitialized variable" warning
  fuse: Turn writeback cache on
  fuse: Fix O_DIRECT operations vs cached writeback misorder
  fuse: fuse_flush() should wait on writeback
  fuse: Implement write_begin/write_end callbacks
  fuse: restructure fuse_readpage()
  fuse: Flush files on wb close
  fuse: Trust kernel i_mtime only
  fuse: Trust kernel i_size only
  fuse: Connection bit for enabling writeback
  fuse: Prepare to handle short reads
  fuse: Linking file to inode helper
parents 56c225fe f3846266
Loading
Loading
Loading
Loading
+3 −2
Original line number Original line Diff line number Diff line
@@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
	struct iovec iov = { .iov_base = buf, .iov_len = count };
	struct iovec iov = { .iov_base = buf, .iov_len = count };
	struct fuse_io_priv io = { .async = 0, .file = file };
	struct fuse_io_priv io = { .async = 0, .file = file };


	return fuse_direct_io(&io, &iov, 1, count, &pos, 0);
	return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE);
}
}


static ssize_t cuse_write(struct file *file, const char __user *buf,
static ssize_t cuse_write(struct file *file, const char __user *buf,
@@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
	 * No locking or generic_write_checks(), the server is
	 * No locking or generic_write_checks(), the server is
	 * responsible for locking and sanity checks.
	 * responsible for locking and sanity checks.
	 */
	 */
	return fuse_direct_io(&io, &iov, 1, count, &pos, 1);
	return fuse_direct_io(&io, &iov, 1, count, &pos,
			      FUSE_DIO_WRITE | FUSE_DIO_CUSE);
}
}


static int cuse_open(struct inode *inode, struct file *file)
static int cuse_open(struct inode *inode, struct file *file)
+101 −18
Original line number Original line Diff line number Diff line
@@ -839,6 +839,14 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
			  struct kstat *stat)
			  struct kstat *stat)
{
{
	unsigned int blkbits;
	unsigned int blkbits;
	struct fuse_conn *fc = get_fuse_conn(inode);

	/* see the comment in fuse_change_attributes() */
	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
		attr->size = i_size_read(inode);
		attr->mtime = inode->i_mtime.tv_sec;
		attr->mtimensec = inode->i_mtime.tv_nsec;
	}


	stat->dev = inode->i_sb->s_dev;
	stat->dev = inode->i_sb->s_dev;
	stat->ino = attr->ino;
	stat->ino = attr->ino;
@@ -1477,12 +1485,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
}
}


static bool update_mtime(unsigned ivalid)
static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
{
{
	/* Always update if mtime is explicitly set  */
	/* Always update if mtime is explicitly set  */
	if (ivalid & ATTR_MTIME_SET)
	if (ivalid & ATTR_MTIME_SET)
		return true;
		return true;


	/* Or if kernel i_mtime is the official one */
	if (trust_local_mtime)
		return true;

	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
		return false;
		return false;
@@ -1491,7 +1503,8 @@ static bool update_mtime(unsigned ivalid)
	return true;
	return true;
}
}


static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
			   bool trust_local_mtime)
{
{
	unsigned ivalid = iattr->ia_valid;
	unsigned ivalid = iattr->ia_valid;


@@ -1510,11 +1523,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
		if (!(ivalid & ATTR_ATIME_SET))
		if (!(ivalid & ATTR_ATIME_SET))
			arg->valid |= FATTR_ATIME_NOW;
			arg->valid |= FATTR_ATIME_NOW;
	}
	}
	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) {
		arg->valid |= FATTR_MTIME;
		arg->valid |= FATTR_MTIME;
		arg->mtime = iattr->ia_mtime.tv_sec;
		arg->mtime = iattr->ia_mtime.tv_sec;
		arg->mtimensec = iattr->ia_mtime.tv_nsec;
		arg->mtimensec = iattr->ia_mtime.tv_nsec;
		if (!(ivalid & ATTR_MTIME_SET))
		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime)
			arg->valid |= FATTR_MTIME_NOW;
			arg->valid |= FATTR_MTIME_NOW;
	}
	}
}
}
@@ -1563,6 +1576,63 @@ void fuse_release_nowrite(struct inode *inode)
	spin_unlock(&fc->lock);
	spin_unlock(&fc->lock);
}
}


static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
			      struct inode *inode,
			      struct fuse_setattr_in *inarg_p,
			      struct fuse_attr_out *outarg_p)
{
	req->in.h.opcode = FUSE_SETATTR;
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(*inarg_p);
	req->in.args[0].value = inarg_p;
	req->out.numargs = 1;
	if (fc->minor < 9)
		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
	else
		req->out.args[0].size = sizeof(*outarg_p);
	req->out.args[0].value = outarg_p;
}

/*
 * Flush inode->i_mtime to the server
 */
int fuse_flush_mtime(struct file *file, bool nofail)
{
	struct inode *inode = file->f_mapping->host;
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_req *req = NULL;
	struct fuse_setattr_in inarg;
	struct fuse_attr_out outarg;
	int err;

	if (nofail) {
		req = fuse_get_req_nofail_nopages(fc, file);
	} else {
		req = fuse_get_req_nopages(fc);
		if (IS_ERR(req))
			return PTR_ERR(req);
	}

	memset(&inarg, 0, sizeof(inarg));
	memset(&outarg, 0, sizeof(outarg));

	inarg.valid |= FATTR_MTIME;
	inarg.mtime = inode->i_mtime.tv_sec;
	inarg.mtimensec = inode->i_mtime.tv_nsec;

	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
	fuse_request_send(fc, req);
	err = req->out.h.error;
	fuse_put_request(fc, req);

	if (!err)
		clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);

	return err;
}

/*
/*
 * Set attributes, and at the same time refresh them.
 * Set attributes, and at the same time refresh them.
 *
 *
@@ -1580,8 +1650,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
	struct fuse_setattr_in inarg;
	struct fuse_setattr_in inarg;
	struct fuse_attr_out outarg;
	struct fuse_attr_out outarg;
	bool is_truncate = false;
	bool is_truncate = false;
	bool is_wb = fc->writeback_cache;
	loff_t oldsize;
	loff_t oldsize;
	int err;
	int err;
	bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode);


	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
		attr->ia_valid |= ATTR_FORCE;
		attr->ia_valid |= ATTR_FORCE;
@@ -1610,7 +1682,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,


	memset(&inarg, 0, sizeof(inarg));
	memset(&inarg, 0, sizeof(inarg));
	memset(&outarg, 0, sizeof(outarg));
	memset(&outarg, 0, sizeof(outarg));
	iattr_to_fattr(attr, &inarg);
	iattr_to_fattr(attr, &inarg, trust_local_mtime);
	if (file) {
	if (file) {
		struct fuse_file *ff = file->private_data;
		struct fuse_file *ff = file->private_data;
		inarg.valid |= FATTR_FH;
		inarg.valid |= FATTR_FH;
@@ -1621,17 +1693,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
		inarg.valid |= FATTR_LOCKOWNER;
		inarg.valid |= FATTR_LOCKOWNER;
		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
	}
	}
	req->in.h.opcode = FUSE_SETATTR;
	fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
	req->in.h.nodeid = get_node_id(inode);
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(inarg);
	req->in.args[0].value = &inarg;
	req->out.numargs = 1;
	if (fc->minor < 9)
		req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
	else
		req->out.args[0].size = sizeof(outarg);
	req->out.args[0].value = &outarg;
	fuse_request_send(fc, req);
	fuse_request_send(fc, req);
	err = req->out.h.error;
	err = req->out.h.error;
	fuse_put_request(fc, req);
	fuse_put_request(fc, req);
@@ -1648,9 +1710,17 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
	}
	}


	spin_lock(&fc->lock);
	spin_lock(&fc->lock);
	/* the kernel maintains i_mtime locally */
	if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) {
		inode->i_mtime = attr->ia_mtime;
		clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
	}

	fuse_change_attributes_common(inode, &outarg.attr,
	fuse_change_attributes_common(inode, &outarg.attr,
				      attr_timeout(&outarg));
				      attr_timeout(&outarg));
	oldsize = inode->i_size;
	oldsize = inode->i_size;
	/* see the comment in fuse_change_attributes() */
	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
		i_size_write(inode, outarg.attr.size);
		i_size_write(inode, outarg.attr.size);


	if (is_truncate) {
	if (is_truncate) {
@@ -1663,7 +1733,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
	 * Only call invalidate_inode_pages2() after removing
	 * Only call invalidate_inode_pages2() after removing
	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
	 */
	 */
	if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
	if ((is_truncate || !is_wb) &&
	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
		truncate_pagecache(inode, outarg.attr.size);
		truncate_pagecache(inode, outarg.attr.size);
		invalidate_inode_pages2(inode->i_mapping);
		invalidate_inode_pages2(inode->i_mapping);
	}
	}
@@ -1875,6 +1946,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
	return err;
	return err;
}
}


static int fuse_update_time(struct inode *inode, struct timespec *now,
			    int flags)
{
	if (flags & S_MTIME) {
		inode->i_mtime = *now;
		set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state);
		BUG_ON(!S_ISREG(inode->i_mode));
	}
	return 0;
}

static const struct inode_operations fuse_dir_inode_operations = {
static const struct inode_operations fuse_dir_inode_operations = {
	.lookup		= fuse_lookup,
	.lookup		= fuse_lookup,
	.mkdir		= fuse_mkdir,
	.mkdir		= fuse_mkdir,
@@ -1914,6 +1996,7 @@ static const struct inode_operations fuse_common_inode_operations = {
	.getxattr	= fuse_getxattr,
	.getxattr	= fuse_getxattr,
	.listxattr	= fuse_listxattr,
	.listxattr	= fuse_listxattr,
	.removexattr	= fuse_removexattr,
	.removexattr	= fuse_removexattr,
	.update_time	= fuse_update_time,
};
};


static const struct inode_operations fuse_symlink_inode_operations = {
static const struct inode_operations fuse_symlink_inode_operations = {
+231 −55
Original line number Original line Diff line number Diff line
@@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
}
}
EXPORT_SYMBOL_GPL(fuse_do_open);
EXPORT_SYMBOL_GPL(fuse_do_open);


static void fuse_link_write_file(struct file *file)
{
	struct inode *inode = file_inode(file);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_file *ff = file->private_data;
	/*
	 * file may be written through mmap, so chain it onto the
	 * inodes's write_file list
	 */
	spin_lock(&fc->lock);
	if (list_empty(&ff->write_entry))
		list_add(&ff->write_entry, &fi->write_files);
	spin_unlock(&fc->lock);
}

void fuse_finish_open(struct inode *inode, struct file *file)
void fuse_finish_open(struct inode *inode, struct file *file)
{
{
	struct fuse_file *ff = file->private_data;
	struct fuse_file *ff = file->private_data;
@@ -208,6 +224,8 @@ void fuse_finish_open(struct inode *inode, struct file *file)
		spin_unlock(&fc->lock);
		spin_unlock(&fc->lock);
		fuse_invalidate_attr(inode);
		fuse_invalidate_attr(inode);
	}
	}
	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
		fuse_link_write_file(file);
}
}


int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@@ -292,6 +310,15 @@ static int fuse_open(struct inode *inode, struct file *file)


static int fuse_release(struct inode *inode, struct file *file)
static int fuse_release(struct inode *inode, struct file *file)
{
{
	struct fuse_conn *fc = get_fuse_conn(inode);

	/* see fuse_vma_close() for !writeback_cache case */
	if (fc->writeback_cache)
		filemap_write_and_wait(file->f_mapping);

	if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state))
		fuse_flush_mtime(file, true);

	fuse_release_common(file, FUSE_RELEASE);
	fuse_release_common(file, FUSE_RELEASE);


	/* return value is ignored by VFS */
	/* return value is ignored by VFS */
@@ -333,12 +360,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
}
}


/*
/*
 * Check if page is under writeback
 * Check if any page in a range is under writeback
 *
 *
 * This is currently done by walking the list of writepage requests
 * This is currently done by walking the list of writepage requests
 * for the inode, which can be pretty inefficient.
 * for the inode, which can be pretty inefficient.
 */
 */
static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
				   pgoff_t idx_to)
{
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -351,8 +379,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)


		BUG_ON(req->inode != inode);
		BUG_ON(req->inode != inode);
		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
		if (curr_index <= index &&
		if (idx_from < curr_index + req->num_pages &&
		    index < curr_index + req->num_pages) {
		    curr_index <= idx_to) {
			found = true;
			found = true;
			break;
			break;
		}
		}
@@ -362,6 +390,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
	return found;
	return found;
}
}


static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
{
	return fuse_range_is_writeback(inode, index, index);
}

/*
/*
 * Wait for page writeback to be completed.
 * Wait for page writeback to be completed.
 *
 *
@@ -376,6 +409,21 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
	return 0;
	return 0;
}
}


/*
 * Wait for all pending writepages on the inode to finish.
 *
 * This is currently done by blocking further writes with FUSE_NOWRITE
 * and waiting for all sent writes to complete.
 *
 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 * could conflict with truncation.
 */
static void fuse_sync_writes(struct inode *inode)
{
	fuse_set_nowrite(inode);
	fuse_release_nowrite(inode);
}

static int fuse_flush(struct file *file, fl_owner_t id)
static int fuse_flush(struct file *file, fl_owner_t id)
{
{
	struct inode *inode = file_inode(file);
	struct inode *inode = file_inode(file);
@@ -391,6 +439,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
	if (fc->no_flush)
	if (fc->no_flush)
		return 0;
		return 0;


	err = filemap_write_and_wait(file->f_mapping);
	if (err)
		return err;

	mutex_lock(&inode->i_mutex);
	fuse_sync_writes(inode);
	mutex_unlock(&inode->i_mutex);

	req = fuse_get_req_nofail_nopages(fc, file);
	req = fuse_get_req_nofail_nopages(fc, file);
	memset(&inarg, 0, sizeof(inarg));
	memset(&inarg, 0, sizeof(inarg));
	inarg.fh = ff->fh;
	inarg.fh = ff->fh;
@@ -411,21 +467,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
	return err;
	return err;
}
}


/*
 * Wait for all pending writepages on the inode to finish.
 *
 * This is currently done by blocking further writes with FUSE_NOWRITE
 * and waiting for all sent writes to complete.
 *
 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 * could conflict with truncation.
 */
static void fuse_sync_writes(struct inode *inode)
{
	fuse_set_nowrite(inode);
	fuse_release_nowrite(inode);
}

int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
		      int datasync, int isdir)
		      int datasync, int isdir)
{
{
@@ -459,6 +500,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,


	fuse_sync_writes(inode);
	fuse_sync_writes(inode);


	if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) {
		int err = fuse_flush_mtime(file, false);
		if (err)
			goto out;
	}

	req = fuse_get_req_nopages(fc);
	req = fuse_get_req_nopages(fc);
	if (IS_ERR(req)) {
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		err = PTR_ERR(req);
@@ -655,7 +702,33 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
	spin_unlock(&fc->lock);
	spin_unlock(&fc->lock);
}
}


static int fuse_readpage(struct file *file, struct page *page)
static void fuse_short_read(struct fuse_req *req, struct inode *inode,
			    u64 attr_ver)
{
	size_t num_read = req->out.args[0].size;
	struct fuse_conn *fc = get_fuse_conn(inode);

	if (fc->writeback_cache) {
		/*
		 * A hole in a file. Some data after the hole are in page cache,
		 * but have not reached the client fs yet. So, the hole is not
		 * present there.
		 */
		int i;
		int start_idx = num_read >> PAGE_CACHE_SHIFT;
		size_t off = num_read & (PAGE_CACHE_SIZE - 1);

		for (i = start_idx; i < req->num_pages; i++) {
			zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
			off = 0;
		}
	} else {
		loff_t pos = page_offset(req->pages[0]) + num_read;
		fuse_read_update_size(inode, pos, attr_ver);
	}
}

static int fuse_do_readpage(struct file *file, struct page *page)
{
{
	struct fuse_io_priv io = { .async = 0, .file = file };
	struct fuse_io_priv io = { .async = 0, .file = file };
	struct inode *inode = page->mapping->host;
	struct inode *inode = page->mapping->host;
@@ -667,10 +740,6 @@ static int fuse_readpage(struct file *file, struct page *page)
	u64 attr_ver;
	u64 attr_ver;
	int err;
	int err;


	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

	/*
	/*
	 * Page writeback can extend beyond the lifetime of the
	 * Page writeback can extend beyond the lifetime of the
	 * page-cache page, so make sure we read a properly synced
	 * page-cache page, so make sure we read a properly synced
@@ -679,9 +748,8 @@ static int fuse_readpage(struct file *file, struct page *page)
	fuse_wait_on_page_writeback(inode, page->index);
	fuse_wait_on_page_writeback(inode, page->index);


	req = fuse_get_req(fc, 1);
	req = fuse_get_req(fc, 1);
	err = PTR_ERR(req);
	if (IS_ERR(req))
	if (IS_ERR(req))
		goto out;
		return PTR_ERR(req);


	attr_ver = fuse_get_attr_version(fc);
	attr_ver = fuse_get_attr_version(fc);


@@ -692,18 +760,32 @@ static int fuse_readpage(struct file *file, struct page *page)
	req->page_descs[0].length = count;
	req->page_descs[0].length = count;
	num_read = fuse_send_read(req, &io, pos, count, NULL);
	num_read = fuse_send_read(req, &io, pos, count, NULL);
	err = req->out.h.error;
	err = req->out.h.error;
	fuse_put_request(fc, req);


	if (!err) {
	if (!err) {
		/*
		/*
		 * Short read means EOF.  If file size is larger, truncate it
		 * Short read means EOF.  If file size is larger, truncate it
		 */
		 */
		if (num_read < count)
		if (num_read < count)
			fuse_read_update_size(inode, pos + num_read, attr_ver);
			fuse_short_read(req, inode, attr_ver);


		SetPageUptodate(page);
		SetPageUptodate(page);
	}
	}


	fuse_put_request(fc, req);

	return err;
}

static int fuse_readpage(struct file *file, struct page *page)
{
	struct inode *inode = page->mapping->host;
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

	err = fuse_do_readpage(file, page);
	fuse_invalidate_atime(inode);
	fuse_invalidate_atime(inode);
 out:
 out:
	unlock_page(page);
	unlock_page(page);
@@ -726,13 +808,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
		/*
		/*
		 * Short read means EOF. If file size is larger, truncate it
		 * Short read means EOF. If file size is larger, truncate it
		 */
		 */
		if (!req->out.h.error && num_read < count) {
		if (!req->out.h.error && num_read < count)
			loff_t pos;
			fuse_short_read(req, inode, req->misc.read.attr_ver);


			pos = page_offset(req->pages[0]) + num_read;
			fuse_read_update_size(inode, pos,
					      req->misc.read.attr_ver);
		}
		fuse_invalidate_atime(inode);
		fuse_invalidate_atime(inode);
	}
	}


@@ -922,16 +1000,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
	return req->misc.write.out.size;
	return req->misc.write.out.size;
}
}


void fuse_write_update_size(struct inode *inode, loff_t pos)
bool fuse_write_update_size(struct inode *inode, loff_t pos)
{
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	bool ret = false;


	spin_lock(&fc->lock);
	spin_lock(&fc->lock);
	fi->attr_version = ++fc->attr_version;
	fi->attr_version = ++fc->attr_version;
	if (pos > inode->i_size)
	if (pos > inode->i_size) {
		i_size_write(inode, pos);
		i_size_write(inode, pos);
		ret = true;
	}
	spin_unlock(&fc->lock);
	spin_unlock(&fc->lock);

	return ret;
}
}


static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
@@ -1116,6 +1199,15 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
	struct iov_iter i;
	struct iov_iter i;
	loff_t endbyte = 0;
	loff_t endbyte = 0;


	if (get_fuse_conn(inode)->writeback_cache) {
		/* Update size (EOF optimization) and mode (SUID clearing) */
		err = fuse_update_attributes(mapping->host, NULL, file, NULL);
		if (err)
			return err;

		return generic_file_aio_write(iocb, iov, nr_segs, pos);
	}

	WARN_ON(iocb->ki_pos != pos);
	WARN_ON(iocb->ki_pos != pos);


	ocount = 0;
	ocount = 0;
@@ -1289,13 +1381,18 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p)


ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
		       unsigned long nr_segs, size_t count, loff_t *ppos,
		       unsigned long nr_segs, size_t count, loff_t *ppos,
		       int write)
		       int flags)
{
{
	int write = flags & FUSE_DIO_WRITE;
	int cuse = flags & FUSE_DIO_CUSE;
	struct file *file = io->file;
	struct file *file = io->file;
	struct inode *inode = file->f_mapping->host;
	struct fuse_file *ff = file->private_data;
	struct fuse_file *ff = file->private_data;
	struct fuse_conn *fc = ff->fc;
	struct fuse_conn *fc = ff->fc;
	size_t nmax = write ? fc->max_write : fc->max_read;
	size_t nmax = write ? fc->max_write : fc->max_read;
	loff_t pos = *ppos;
	loff_t pos = *ppos;
	pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
	pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
	ssize_t res = 0;
	ssize_t res = 0;
	struct fuse_req *req;
	struct fuse_req *req;
	struct iov_iter ii;
	struct iov_iter ii;
@@ -1309,6 +1406,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
	if (IS_ERR(req))
	if (IS_ERR(req))
		return PTR_ERR(req);
		return PTR_ERR(req);


	if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
		if (!write)
			mutex_lock(&inode->i_mutex);
		fuse_sync_writes(inode);
		if (!write)
			mutex_unlock(&inode->i_mutex);
	}

	while (count) {
	while (count) {
		size_t nres;
		size_t nres;
		fl_owner_t owner = current->files;
		fl_owner_t owner = current->files;
@@ -1397,7 +1502,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io,


	res = generic_write_checks(file, ppos, &count, 0);
	res = generic_write_checks(file, ppos, &count, 0);
	if (!res)
	if (!res)
		res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1);
		res = fuse_direct_io(io, iov, nr_segs, count, ppos,
				     FUSE_DIO_WRITE);


	fuse_invalidate_attr(inode);
	fuse_invalidate_attr(inode);


@@ -1885,6 +1991,77 @@ static int fuse_writepages(struct address_space *mapping,
	return err;
	return err;
}
}


/*
 * It's worthy to make sure that space is reserved on disk for the write,
 * but how to implement it without killing performance need more thinking.
 */
static int fuse_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
	struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
	struct page *page;
	loff_t fsize;
	int err = -ENOMEM;

	WARN_ON(!fc->writeback_cache);

	page = grab_cache_page_write_begin(mapping, index, flags);
	if (!page)
		goto error;

	fuse_wait_on_page_writeback(mapping->host, page->index);

	if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
		goto success;
	/*
	 * Check if the start this page comes after the end of file, in which
	 * case the readpage can be optimized away.
	 */
	fsize = i_size_read(mapping->host);
	if (fsize <= (pos & PAGE_CACHE_MASK)) {
		size_t off = pos & ~PAGE_CACHE_MASK;
		if (off)
			zero_user_segment(page, 0, off);
		goto success;
	}
	err = fuse_do_readpage(file, page);
	if (err)
		goto cleanup;
success:
	*pagep = page;
	return 0;

cleanup:
	unlock_page(page);
	page_cache_release(page);
error:
	return err;
}

static int fuse_write_end(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned copied,
		struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

	if (!PageUptodate(page)) {
		/* Zero any unwritten bytes at the end of the page */
		size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
		if (endoff)
			zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
		SetPageUptodate(page);
	}

	fuse_write_update_size(inode, pos + copied);
	set_page_dirty(page);
	unlock_page(page);
	page_cache_release(page);

	return copied;
}

static int fuse_launder_page(struct page *page)
static int fuse_launder_page(struct page *page)
{
{
	int err = 0;
	int err = 0;
@@ -1946,20 +2123,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = {


static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
{
	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
		struct inode *inode = file_inode(file);
		fuse_link_write_file(file);
		struct fuse_conn *fc = get_fuse_conn(inode);

		struct fuse_inode *fi = get_fuse_inode(inode);
		struct fuse_file *ff = file->private_data;
		/*
		 * file may be written through mmap, so chain it onto the
		 * inodes's write_file list
		 */
		spin_lock(&fc->lock);
		if (list_empty(&ff->write_entry))
			list_add(&ff->write_entry, &fi->write_files);
		spin_unlock(&fc->lock);
	}
	file_accessed(file);
	file_accessed(file);
	vma->vm_ops = &fuse_file_vm_ops;
	vma->vm_ops = &fuse_file_vm_ops;
	return 0;
	return 0;
@@ -2606,7 +2772,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
{
{
	spin_lock(&fc->lock);
	spin_lock(&fc->lock);
	if (RB_EMPTY_NODE(&ff->polled_node)) {
	if (RB_EMPTY_NODE(&ff->polled_node)) {
		struct rb_node **link, *parent;
		struct rb_node **link, *uninitialized_var(parent);


		link = fuse_find_polled_node(fc, ff->kh, &parent);
		link = fuse_find_polled_node(fc, ff->kh, &parent);
		BUG_ON(*link);
		BUG_ON(*link);
@@ -2850,8 +3016,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
		goto out;
		goto out;


	/* we could have extended the file */
	/* we could have extended the file */
	if (!(mode & FALLOC_FL_KEEP_SIZE))
	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
		fuse_write_update_size(inode, offset + length);
		bool changed = fuse_write_update_size(inode, offset + length);

		if (changed && fc->writeback_cache) {
			struct fuse_inode *fi = get_fuse_inode(inode);

			inode->i_mtime = current_fs_time(inode->i_sb);
			set_bit(FUSE_I_MTIME_DIRTY, &fi->state);
		}
	}


	if (mode & FALLOC_FL_PUNCH_HOLE)
	if (mode & FALLOC_FL_PUNCH_HOLE)
		truncate_pagecache_range(inode, offset, offset + length - 1);
		truncate_pagecache_range(inode, offset, offset + length - 1);
@@ -2915,6 +3089,8 @@ static const struct address_space_operations fuse_file_aops = {
	.set_page_dirty	= __set_page_dirty_nobuffers,
	.set_page_dirty	= __set_page_dirty_nobuffers,
	.bmap		= fuse_bmap,
	.bmap		= fuse_bmap,
	.direct_IO	= fuse_direct_IO,
	.direct_IO	= fuse_direct_IO,
	.write_begin	= fuse_write_begin,
	.write_end	= fuse_write_end,
};
};


void fuse_init_file_inode(struct inode *inode)
void fuse_init_file_inode(struct inode *inode)
+20 −2
Original line number Original line Diff line number Diff line
@@ -119,6 +119,8 @@ enum {
	FUSE_I_INIT_RDPLUS,
	FUSE_I_INIT_RDPLUS,
	/** An operation changing file size is in progress  */
	/** An operation changing file size is in progress  */
	FUSE_I_SIZE_UNSTABLE,
	FUSE_I_SIZE_UNSTABLE,
	/** i_mtime has been updated locally; a flush to userspace needed */
	FUSE_I_MTIME_DIRTY,
};
};


struct fuse_conn;
struct fuse_conn;
@@ -480,6 +482,9 @@ struct fuse_conn {
	/** Set if bdi is valid */
	/** Set if bdi is valid */
	unsigned bdi_initialized:1;
	unsigned bdi_initialized:1;


	/** write-back cache policy (default is write-through) */
	unsigned writeback_cache:1;

	/*
	/*
	 * The following bitfields are only for optimization purposes
	 * The following bitfields are only for optimization purposes
	 * and hence races in setting them will not cause malfunction
	 * and hence races in setting them will not cause malfunction
@@ -863,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,


int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
		 bool isdir);
		 bool isdir);

/**
 * fuse_direct_io() flags
 */

/** If set, it is WRITE; otherwise - READ */
#define FUSE_DIO_WRITE (1 << 0)

/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
#define FUSE_DIO_CUSE  (1 << 1)

ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
		       unsigned long nr_segs, size_t count, loff_t *ppos,
		       unsigned long nr_segs, size_t count, loff_t *ppos,
		       int write);
		       int flags);
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
		   unsigned int flags);
		   unsigned int flags);
long fuse_ioctl_common(struct file *file, unsigned int cmd,
long fuse_ioctl_common(struct file *file, unsigned int cmd,
@@ -873,7 +889,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
unsigned fuse_file_poll(struct file *file, poll_table *wait);
unsigned fuse_file_poll(struct file *file, poll_table *wait);
int fuse_dev_release(struct inode *inode, struct file *file);
int fuse_dev_release(struct inode *inode, struct file *file);


void fuse_write_update_size(struct inode *inode, loff_t pos);
bool fuse_write_update_size(struct inode *inode, loff_t pos);

int fuse_flush_mtime(struct file *file, bool nofail);


int fuse_do_setattr(struct inode *inode, struct iattr *attr,
int fuse_do_setattr(struct inode *inode, struct iattr *attr,
		    struct file *file);
		    struct file *file);
+23 −6
Original line number Original line Diff line number Diff line
@@ -170,8 +170,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
	inode->i_blocks  = attr->blocks;
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
	inode->i_atime.tv_nsec  = attr->atimensec;
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
	}
	inode->i_ctime.tv_sec   = attr->ctime;
	inode->i_ctime.tv_sec   = attr->ctime;
	inode->i_ctime.tv_nsec  = attr->ctimensec;
	inode->i_ctime.tv_nsec  = attr->ctimensec;


@@ -197,6 +200,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
{
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	bool is_wb = fc->writeback_cache;
	loff_t oldsize;
	loff_t oldsize;
	struct timespec old_mtime;
	struct timespec old_mtime;


@@ -211,10 +215,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
	fuse_change_attributes_common(inode, attr, attr_valid);
	fuse_change_attributes_common(inode, attr, attr_valid);


	oldsize = inode->i_size;
	oldsize = inode->i_size;
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
		i_size_write(inode, attr->size);
	spin_unlock(&fc->lock);
	spin_unlock(&fc->lock);


	if (S_ISREG(inode->i_mode)) {
	if (!is_wb && S_ISREG(inode->i_mode)) {
		bool inval = false;
		bool inval = false;


		if (oldsize != attr->size) {
		if (oldsize != attr->size) {
@@ -243,6 +253,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
{
	inode->i_mode = attr->mode & S_IFMT;
	inode->i_mode = attr->mode & S_IFMT;
	inode->i_size = attr->size;
	inode->i_size = attr->size;
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
	if (S_ISREG(inode->i_mode)) {
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
		fuse_init_common(inode);
		fuse_init_file_inode(inode);
		fuse_init_file_inode(inode);
@@ -289,7 +301,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
		return NULL;
		return NULL;


	if ((inode->i_state & I_NEW)) {
	if ((inode->i_state & I_NEW)) {
		inode->i_flags |= S_NOATIME|S_NOCMTIME;
		inode->i_flags |= S_NOATIME;
		if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
			inode->i_flags |= S_NOCMTIME;
		inode->i_generation = generation;
		inode->i_generation = generation;
		inode->i_data.backing_dev_info = &fc->bdi;
		inode->i_data.backing_dev_info = &fc->bdi;
		fuse_init_inode(inode, attr);
		fuse_init_inode(inode, attr);
@@ -873,6 +887,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
			}
			}
			if (arg->flags & FUSE_ASYNC_DIO)
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
				fc->async_dio = 1;
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
		} else {
		} else {
			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
			fc->no_lock = 1;
			fc->no_lock = 1;
@@ -900,7 +916,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO;
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
		FUSE_WRITEBACK_CACHE;
	req->in.h.opcode = FUSE_INIT;
	req->in.h.opcode = FUSE_INIT;
	req->in.numargs = 1;
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(*arg);
	req->in.args[0].size = sizeof(*arg);
Loading