Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit da315f6e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fuse updates from Miklos Szeredi:
 "The most interesting part of this update is user namespace support,
  mostly done by Eric Biederman. This enables safe unprivileged fuse
  mounts within a user namespace.

  There are also a couple of fixes for bugs found by syzbot and
  miscellaneous fixes and cleanups"

* tag 'fuse-update-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: don't keep dead fuse_conn at fuse_fill_super().
  fuse: fix control dir setup and teardown
  fuse: fix congested state leak on aborted connections
  fuse: Allow fully unprivileged mounts
  fuse: Ensure posix acls are translated outside of init_user_ns
  fuse: add writeback documentation
  fuse: honor AT_STATX_FORCE_SYNC
  fuse: honor AT_STATX_DONT_SYNC
  fuse: Restrict allow_other to the superblock's namespace or a descendant
  fuse: Support fuse filesystems outside of init_user_ns
  fuse: Fail all requests with invalid uids or gids
  fuse: Remove the buggy retranslation of pids in fuse_dev_do_read
  fuse: return -ECONNABORTED on /dev/fuse read after abort
  fuse: atomic_o_trunc should truncate pagecache
parents 1c8c5a9d 543b8f86
Loading
Loading
Loading
Loading
+38 −0
Original line number Diff line number Diff line
Fuse supports the following I/O modes:

- direct-io
- cached
  + write-through
  + writeback-cache

The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the
FUSE_OPEN reply.

In direct-io mode the page cache is completely bypassed for reads and writes.
No read-ahead takes place. Shared mmap is disabled.

In cached mode reads may be satisfied from the page cache, and data may be
read-ahead by the kernel to fill the cache.  The cache is always kept consistent
after any writes to the file.  All mmap modes are supported.

The cached mode has two sub modes controlling how writes are handled.  The
write-through mode is the default and is supported on all kernels.  The
writeback-cache mode may be selected by the FUSE_WRITEBACK_CACHE flag in the
FUSE_INIT reply.

In write-through mode each write is immediately sent to userspace as one or more
WRITE requests, as well as updating any cached pages (and caching previously
uncached, but fully written pages).  No READ requests are ever sent for writes,
so when an uncached page is partially written, the page is discarded.

In writeback-cache mode (enabled by the FUSE_WRITEBACK_CACHE flag) writes go to
the cache only, which means that the write(2) syscall can often complete very
fast.  Dirty pages are written back implicitly (background writeback or page
reclaim on memory pressure) or explicitly (invoked by close(2), fsync(2) and
when the last ref to the file is being released on munmap(2)).  This mode
assumes that all changes to the filesystem go through the FUSE kernel module
(size and atime/ctime/mtime attributes are kept up-to-date by the kernel), so
it's generally not suitable for network filesystems.  If a partial page is
written, then the page needs to be first read from userspace.  This means, that
even for files opened for O_WRONLY it is possible that READ requests will be
generated by the kernel.
+2 −2
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
		return ERR_PTR(-ENOMEM);
	size = fuse_getxattr(inode, name, value, PAGE_SIZE);
	if (size > 0)
		acl = posix_acl_from_xattr(&init_user_ns, value, size);
		acl = posix_acl_from_xattr(fc->user_ns, value, size);
	else if ((size == 0) || (size == -ENODATA) ||
		 (size == -EOPNOTSUPP && fc->no_getxattr))
		acl = NULL;
@@ -81,7 +81,7 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
		if (!value)
			return -ENOMEM;

		ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
		ret = posix_acl_to_xattr(fc->user_ns, acl, value, size);
		if (ret < 0) {
			kfree(value);
			return ret;
+11 −4
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
{
	struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
	if (fc) {
		fuse_abort_conn(fc);
		fuse_abort_conn(fc, true);
		fuse_conn_put(fc);
	}
	return count;
@@ -211,10 +211,11 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
	if (!dentry)
		return NULL;

	fc->ctl_dentry[fc->ctl_ndents++] = dentry;
	inode = new_inode(fuse_control_sb);
	if (!inode)
	if (!inode) {
		dput(dentry);
		return NULL;
	}

	inode->i_ino = get_next_ino();
	inode->i_mode = mode;
@@ -228,6 +229,9 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
	set_nlink(inode, nlink);
	inode->i_private = fc;
	d_add(dentry, inode);

	fc->ctl_dentry[fc->ctl_ndents++] = dentry;

	return dentry;
}

@@ -284,7 +288,10 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
	for (i = fc->ctl_ndents - 1; i >= 0; i--) {
		struct dentry *dentry = fc->ctl_dentry[i];
		d_inode(dentry)->i_private = NULL;
		d_drop(dentry);
		if (!i) {
			/* Get rid of submounts: */
			d_invalidate(dentry);
		}
		dput(dentry);
	}
	drop_nlink(d_inode(fuse_control_sb->s_root));
+8 −3
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/user_namespace.h>

#include "fuse_i.h"

@@ -406,7 +407,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
err_region:
	unregister_chrdev_region(devt, 1);
err:
	fuse_abort_conn(fc);
	fuse_abort_conn(fc, false);
	goto out;
}

@@ -498,7 +499,11 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
	if (!cc)
		return -ENOMEM;

	fuse_conn_init(&cc->fc);
	/*
	 * Limit the cuse channel to requests that can
	 * be represented in file->f_cred->user_ns.
	 */
	fuse_conn_init(&cc->fc, file->f_cred->user_ns);

	fud = fuse_dev_alloc(&cc->fc);
	if (!fud) {
@@ -581,7 +586,7 @@ static ssize_t cuse_class_abort_store(struct device *dev,
{
	struct cuse_conn *cc = dev_get_drvdata(dev);

	fuse_abort_conn(&cc->fc);
	fuse_abort_conn(&cc->fc, false);
	return count;
}
static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
+21 −22
Original line number Diff line number Diff line
@@ -112,13 +112,6 @@ static void __fuse_put_request(struct fuse_req *req)
	refcount_dec(&req->count);
}

static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
{
	req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
	req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
}

void fuse_set_initialized(struct fuse_conn *fc)
{
	/* Make sure stores before this are seen on another CPU */
@@ -163,11 +156,19 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
		goto out;
	}

	fuse_req_init_context(fc, req);
	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);

	__set_bit(FR_WAITING, &req->flags);
	if (for_background)
		__set_bit(FR_BACKGROUND, &req->flags);

	if (unlikely(req->in.h.uid == ((uid_t)-1) ||
		     req->in.h.gid == ((gid_t)-1))) {
		fuse_put_request(fc, req);
		return ERR_PTR(-EOVERFLOW);
	}
	return req;

 out:
@@ -256,7 +257,10 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
	if (!req)
		req = get_reserved_req(fc, file);

	fuse_req_init_context(fc, req);
	req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
	req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);

	__set_bit(FR_WAITING, &req->flags);
	__clear_bit(FR_BACKGROUND, &req->flags);
	return req;
@@ -381,8 +385,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
		if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
			wake_up(&fc->blocked_waitq);

		if (fc->num_background == fc->congestion_threshold &&
		    fc->connected && fc->sb) {
		if (fc->num_background == fc->congestion_threshold && fc->sb) {
			clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
			clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
		}
@@ -1234,9 +1237,10 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
	if (err)
		goto err_unlock;

	err = -ENODEV;
	if (!fiq->connected)
	if (!fiq->connected) {
		err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
		goto err_unlock;
	}

	if (!list_empty(&fiq->interrupts)) {
		req = list_entry(fiq->interrupts.next, struct fuse_req,
@@ -1260,12 +1264,6 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
	in = &req->in;
	reqsize = in->h.len;

	if (task_active_pid_ns(current) != fc->pid_ns) {
		rcu_read_lock();
		in->h.pid = pid_vnr(find_pid_ns(in->h.pid, fc->pid_ns));
		rcu_read_unlock();
	}

	/* If request is too large, reply with an error and restart the read */
	if (nbytes < reqsize) {
		req->out.h.error = -EIO;
@@ -1287,7 +1285,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
	spin_lock(&fpq->lock);
	clear_bit(FR_LOCKED, &req->flags);
	if (!fpq->connected) {
		err = -ENODEV;
		err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
		goto out_end;
	}
	if (err) {
@@ -2076,7 +2074,7 @@ static void end_polls(struct fuse_conn *fc)
 * is OK, the request will in that case be removed from the list before we touch
 * it.
 */
void fuse_abort_conn(struct fuse_conn *fc)
void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
{
	struct fuse_iqueue *fiq = &fc->iq;

@@ -2089,6 +2087,7 @@ void fuse_abort_conn(struct fuse_conn *fc)

		fc->connected = 0;
		fc->blocked = 0;
		fc->aborted = is_abort;
		fuse_set_initialized(fc);
		list_for_each_entry(fud, &fc->devices, entry) {
			struct fuse_pqueue *fpq = &fud->pq;
@@ -2151,7 +2150,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
		/* Are we the last open device? */
		if (atomic_dec_and_test(&fc->dev_count)) {
			WARN_ON(fc->iq.fasync != NULL);
			fuse_abort_conn(fc);
			fuse_abort_conn(fc, false);
		}
		fuse_dev_free(fud);
	}
Loading