Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a7fa20a5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fuse updates from Miklos Szeredi:
 "This adds a ->writepage() implementation to fuse, improving mmaped
  writeout and paving the way for buffered writeback.

  And there's a patch to add a fix minor number for /dev/cuse, similarly
  to /dev/fuse"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: writepages: protect secondary requests from fuse file release
  fuse: writepages: update bdi writeout when deleting secondary request
  fuse: writepages: crop secondary requests
  fuse: writepages: roll back changes if request not found
  cuse: add fix minor number to /dev/cuse
  fuse: writepage: skip already in flight
  fuse: writepages: handle same page rewrites
  fuse: writepages: fix aggregation
  fuse: fix race in fuse_writepages()
  fuse: Implement writepages callback
  fuse: don't BUG on no write file
  fuse: lock page in mkwrite
  fuse: Prepare to handle multiple pages in writeback
  fuse: Getting file for writeback helper
parents a3012453 ce128de6
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -414,6 +414,7 @@ Your cooperation is appreciated.
		200 = /dev/net/tun	TAP/TUN network device
		201 = /dev/button/gulpb	Transmeta GULP-B buttons
		202 = /dev/emd/ctl	Enhanced Metadisk RAID (EMD) control
		203 = /dev/cuse		Cuse (character device in user-space)
		204 = /dev/video/em8300		EM8300 DVD decoder control
		205 = /dev/video/em8300_mv	EM8300 DVD decoder video
		206 = /dev/video/em8300_ma	EM8300 DVD decoder audio
+4 −1
Original line number Diff line number Diff line
@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
ATTRIBUTE_GROUPS(cuse_class_dev);

static struct miscdevice cuse_miscdev = {
	.minor		= MISC_DYNAMIC_MINOR,
	.minor		= CUSE_MINOR,
	.name		= "cuse",
	.fops		= &cuse_channel_fops,
};

MODULE_ALIAS_MISCDEV(CUSE_MINOR);
MODULE_ALIAS("devname:cuse");

static int __init cuse_init(void)
{
	int i, rc;
+335 −26
Original line number Diff line number Diff line
@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)

		BUG_ON(req->inode != inode);
		curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
		if (curr_index == index) {
		if (curr_index <= index &&
		    index < curr_index + req->num_pages) {
			found = true;
			break;
		}
@@ -1409,7 +1410,12 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,

static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
	__free_page(req->pages[0]);
	int i;

	for (i = 0; i < req->num_pages; i++)
		__free_page(req->pages[i]);

	if (req->ff)
		fuse_file_put(req->ff, false);
}

@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
	struct inode *inode = req->inode;
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
	int i;

	list_del(&req->writepages_entry);
	for (i = 0; i < req->num_pages; i++) {
		dec_bdi_stat(bdi, BDI_WRITEBACK);
	dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
		dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
		bdi_writeout_inc(bdi);
	}
	wake_up(&fi->page_waitq);
}

/* Called under fc->lock, may release and reacquire it */
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
				loff_t size)
__releases(fc->lock)
__acquires(fc->lock)
{
	struct fuse_inode *fi = get_fuse_inode(req->inode);
	loff_t size = i_size_read(req->inode);
	struct fuse_write_in *inarg = &req->misc.write.in;
	__u64 data_size = req->num_pages * PAGE_CACHE_SIZE;

	if (!fc->connected)
		goto out_free;

	if (inarg->offset + PAGE_CACHE_SIZE <= size) {
		inarg->size = PAGE_CACHE_SIZE;
	if (inarg->offset + data_size <= size) {
		inarg->size = data_size;
	} else if (inarg->offset < size) {
		inarg->size = size & (PAGE_CACHE_SIZE - 1);
		inarg->size = size - inarg->offset;
	} else {
		/* Got truncated off completely */
		goto out_free;
@@ -1472,12 +1482,13 @@ __acquires(fc->lock)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	size_t crop = i_size_read(inode);
	struct fuse_req *req;

	while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
		req = list_entry(fi->queued_writes.next, struct fuse_req, list);
		list_del_init(&req->list);
		fuse_send_writepage(fc, req);
		fuse_send_writepage(fc, req, crop);
	}
}

@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)

	mapping_set_error(inode->i_mapping, req->out.h.error);
	spin_lock(&fc->lock);
	while (req->misc.write.next) {
		struct fuse_conn *fc = get_fuse_conn(inode);
		struct fuse_write_in *inarg = &req->misc.write.in;
		struct fuse_req *next = req->misc.write.next;
		req->misc.write.next = next->misc.write.next;
		next->misc.write.next = NULL;
		next->ff = fuse_file_get(req->ff);
		list_add(&next->writepages_entry, &fi->writepages);

		/*
		 * Skip fuse_flush_writepages() to make it easy to crop requests
		 * based on primary request size.
		 *
		 * 1st case (trivial): there are no concurrent activities using
		 * fuse_set/release_nowrite.  Then we're on safe side because
		 * fuse_flush_writepages() would call fuse_send_writepage()
		 * anyway.
		 *
		 * 2nd case: someone called fuse_set_nowrite and it is waiting
		 * now for completion of all in-flight requests.  This happens
		 * rarely and no more than once per page, so this should be
		 * okay.
		 *
		 * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
		 * of fuse_set_nowrite..fuse_release_nowrite section.  The fact
		 * that fuse_set_nowrite returned implies that all in-flight
		 * requests were completed along with all of their secondary
		 * requests.  Further primary requests are blocked by negative
		 * writectr.  Hence there cannot be any in-flight requests and
		 * no invocations of fuse_writepage_end() while we're in
		 * fuse_set_nowrite..fuse_release_nowrite section.
		 */
		fuse_send_writepage(fc, next, inarg->offset + inarg->size);
	}
	fi->writectr--;
	fuse_writepage_finish(fc, req);
	spin_unlock(&fc->lock);
	fuse_writepage_free(fc, req);
}

static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
					     struct fuse_inode *fi)
{
	struct fuse_file *ff = NULL;

	spin_lock(&fc->lock);
	if (!WARN_ON(list_empty(&fi->write_files))) {
		ff = list_entry(fi->write_files.next, struct fuse_file,
				write_entry);
		fuse_file_get(ff);
	}
	spin_unlock(&fc->lock);

	return ff;
}

static int fuse_writepage_locked(struct page *page)
{
	struct address_space *mapping = page->mapping;
@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	struct fuse_req *req;
	struct fuse_file *ff;
	struct page *tmp_page;
	int error = -ENOMEM;

	set_page_writeback(page);

@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
	if (!tmp_page)
		goto err_free;

	spin_lock(&fc->lock);
	BUG_ON(list_empty(&fi->write_files));
	ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
	req->ff = fuse_file_get(ff);
	spin_unlock(&fc->lock);
	error = -EIO;
	req->ff = fuse_write_file_get(fc, fi);
	if (!req->ff)
		goto err_free;

	fuse_write_fill(req, ff, page_offset(page), 0);
	fuse_write_fill(req, req->ff, page_offset(page), 0);

	copy_highpage(tmp_page, page);
	req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
	req->misc.write.next = NULL;
	req->in.argpages = 1;
	req->num_pages = 1;
	req->pages[0] = tmp_page;
@@ -1550,19 +1611,263 @@ static int fuse_writepage_locked(struct page *page)
	fuse_request_free(req);
err:
	end_page_writeback(page);
	return -ENOMEM;
	return error;
}

static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{
	int err;

	if (fuse_page_is_writeback(page->mapping->host, page->index)) {
		/*
		 * ->writepages() should be called for sync() and friends.  We
		 * should only get here on direct reclaim and then we are
		 * allowed to skip a page which is already in flight
		 */
		WARN_ON(wbc->sync_mode == WB_SYNC_ALL);

		redirty_page_for_writepage(wbc, page);
		return 0;
	}

	err = fuse_writepage_locked(page);
	unlock_page(page);

	return err;
}

struct fuse_fill_wb_data {
	struct fuse_req *req;
	struct fuse_file *ff;
	struct inode *inode;
	struct page **orig_pages;
};

static void fuse_writepages_send(struct fuse_fill_wb_data *data)
{
	struct fuse_req *req = data->req;
	struct inode *inode = data->inode;
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	int num_pages = req->num_pages;
	int i;

	req->ff = fuse_file_get(data->ff);
	spin_lock(&fc->lock);
	list_add_tail(&req->list, &fi->queued_writes);
	fuse_flush_writepages(inode);
	spin_unlock(&fc->lock);

	for (i = 0; i < num_pages; i++)
		end_page_writeback(data->orig_pages[i]);
}

static bool fuse_writepage_in_flight(struct fuse_req *new_req,
				     struct page *page)
{
	struct fuse_conn *fc = get_fuse_conn(new_req->inode);
	struct fuse_inode *fi = get_fuse_inode(new_req->inode);
	struct fuse_req *tmp;
	struct fuse_req *old_req;
	bool found = false;
	pgoff_t curr_index;

	BUG_ON(new_req->num_pages != 0);

	spin_lock(&fc->lock);
	list_del(&new_req->writepages_entry);
	list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
		BUG_ON(old_req->inode != new_req->inode);
		curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
		if (curr_index <= page->index &&
		    page->index < curr_index + old_req->num_pages) {
			found = true;
			break;
		}
	}
	if (!found) {
		list_add(&new_req->writepages_entry, &fi->writepages);
		goto out_unlock;
	}

	new_req->num_pages = 1;
	for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
		BUG_ON(tmp->inode != new_req->inode);
		curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
		if (tmp->num_pages == 1 &&
		    curr_index == page->index) {
			old_req = tmp;
		}
	}

	if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
					old_req->state == FUSE_REQ_PENDING)) {
		struct backing_dev_info *bdi = page->mapping->backing_dev_info;

		copy_highpage(old_req->pages[0], page);
		spin_unlock(&fc->lock);

		dec_bdi_stat(bdi, BDI_WRITEBACK);
		dec_zone_page_state(page, NR_WRITEBACK_TEMP);
		bdi_writeout_inc(bdi);
		fuse_writepage_free(fc, new_req);
		fuse_request_free(new_req);
		goto out;
	} else {
		new_req->misc.write.next = old_req->misc.write.next;
		old_req->misc.write.next = new_req;
	}
out_unlock:
	spin_unlock(&fc->lock);
out:
	return found;
}

static int fuse_writepages_fill(struct page *page,
		struct writeback_control *wbc, void *_data)
{
	struct fuse_fill_wb_data *data = _data;
	struct fuse_req *req = data->req;
	struct inode *inode = data->inode;
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct page *tmp_page;
	bool is_writeback;
	int err;

	if (!data->ff) {
		err = -EIO;
		data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
		if (!data->ff)
			goto out_unlock;
	}

	/*
	 * Being under writeback is unlikely but possible.  For example direct
	 * read to an mmaped fuse file will set the page dirty twice; once when
	 * the pages are faulted with get_user_pages(), and then after the read
	 * completed.
	 */
	is_writeback = fuse_page_is_writeback(inode, page->index);

	if (req && req->num_pages &&
	    (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
	     (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
	     data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
		fuse_writepages_send(data);
		data->req = NULL;
	}
	err = -ENOMEM;
	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
	if (!tmp_page)
		goto out_unlock;

	/*
	 * The page must not be redirtied until the writeout is completed
	 * (i.e. userspace has sent a reply to the write request).  Otherwise
	 * there could be more than one temporary page instance for each real
	 * page.
	 *
	 * This is ensured by holding the page lock in page_mkwrite() while
	 * checking fuse_page_is_writeback().  We already hold the page lock
	 * since clear_page_dirty_for_io() and keep it held until we add the
	 * request to the fi->writepages list and increment req->num_pages.
	 * After this fuse_page_is_writeback() will indicate that the page is
	 * under writeback, so we can release the page lock.
	 */
	if (data->req == NULL) {
		struct fuse_inode *fi = get_fuse_inode(inode);

		err = -ENOMEM;
		req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
		if (!req) {
			__free_page(tmp_page);
			goto out_unlock;
		}

		fuse_write_fill(req, data->ff, page_offset(page), 0);
		req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
		req->misc.write.next = NULL;
		req->in.argpages = 1;
		req->background = 1;
		req->num_pages = 0;
		req->end = fuse_writepage_end;
		req->inode = inode;

		spin_lock(&fc->lock);
		list_add(&req->writepages_entry, &fi->writepages);
		spin_unlock(&fc->lock);

		data->req = req;
	}
	set_page_writeback(page);

	copy_highpage(tmp_page, page);
	req->pages[req->num_pages] = tmp_page;
	req->page_descs[req->num_pages].offset = 0;
	req->page_descs[req->num_pages].length = PAGE_SIZE;

	inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);

	err = 0;
	if (is_writeback && fuse_writepage_in_flight(req, page)) {
		end_page_writeback(page);
		data->req = NULL;
		goto out_unlock;
	}
	data->orig_pages[req->num_pages] = page;

	/*
	 * Protected by fc->lock against concurrent access by
	 * fuse_page_is_writeback().
	 */
	spin_lock(&fc->lock);
	req->num_pages++;
	spin_unlock(&fc->lock);

out_unlock:
	unlock_page(page);

	return err;
}

static int fuse_writepages(struct address_space *mapping,
			   struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
	struct fuse_fill_wb_data data;
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out;

	data.inode = inode;
	data.req = NULL;
	data.ff = NULL;

	err = -ENOMEM;
	data.orig_pages = kzalloc(sizeof(struct page *) *
				  FUSE_MAX_PAGES_PER_REQ,
				  GFP_NOFS);
	if (!data.orig_pages)
		goto out;

	err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
	if (data.req) {
		/* Ignore errors if we can write at least one page */
		BUG_ON(!data.req->num_pages);
		fuse_writepages_send(&data);
		err = 0;
	}
	if (data.ff)
		fuse_file_put(data.ff, false);

	kfree(data.orig_pages);
out:
	return err;
}

static int fuse_launder_page(struct page *page)
{
	int err = 0;
@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	/*
	 * Don't use page->mapping as it may become NULL from a
	 * concurrent truncate.
	 */
	struct inode *inode = vma->vm_file->f_mapping->host;
	struct inode *inode = file_inode(vma->vm_file);

	file_update_time(vma->vm_file);
	lock_page(page);
	if (page->mapping != inode->i_mapping) {
		unlock_page(page);
		return VM_FAULT_NOPAGE;
	}

	fuse_wait_on_page_writeback(inode, page->index);
	return 0;
	return VM_FAULT_LOCKED;
}

static const struct vm_operations_struct fuse_file_vm_ops = {
@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
static const struct address_space_operations fuse_file_aops  = {
	.readpage	= fuse_readpage,
	.writepage	= fuse_writepage,
	.writepages	= fuse_writepages,
	.launder_page	= fuse_launder_page,
	.readpages	= fuse_readpages,
	.set_page_dirty	= __set_page_dirty_nobuffers,
+1 −0
Original line number Diff line number Diff line
@@ -321,6 +321,7 @@ struct fuse_req {
		struct {
			struct fuse_write_in in;
			struct fuse_write_out out;
			struct fuse_req *next;
		} write;
		struct fuse_notify_retrieve_in retrieve_in;
		struct fuse_lk_in lk_in;
+1 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#define I2O_MINOR		166
#define MICROCODE_MINOR		184
#define TUN_MINOR		200
#define CUSE_MINOR		203
#define MWAVE_MINOR		219	/* ACP/Mwave Modem */
#define MPT_MINOR		220
#define MPT2SAS_MINOR		221