Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8b0fdf63 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull mqueue/bpf vfs cleanups from Al Viro:
 "mqueue and bpf go through rather painful and similar contortions to
  create objects in their dentry trees. Provide a primitive for doing
  that without abusing ->mknod(), switch bpf and mqueue to it.

  Another mqueue-related thing that has ended up in that branch is
  on-demand creation of internal mount (based upon the work of Giuseppe
  Scrivano)"

* 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  mqueue: switch to on-demand creation of internal mount
  tidy do_mq_open() up a bit
  mqueue: clean prepare_open() up
  do_mq_open(): move all work prior to dentry_open() into a helper
  mqueue: fold mq_attr_ok() into mqueue_get_inode()
  move dentry_open() calls up into do_mq_open()
  mqueue: switch to vfs_mkobj(), quit abusing ->d_fsdata
  bpf_obj_do_pin(): switch to vfs_mkobj(), quit abusing ->mknod()
  new primitive: vfs_mkobj()
parents 168fe32a 36735a6a
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -2895,6 +2895,27 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
EXPORT_SYMBOL(vfs_create);

int vfs_mkobj(struct dentry *dentry, umode_t mode,
		int (*f)(struct dentry *, umode_t, void *),
		void *arg)
{
	struct inode *dir = dentry->d_parent->d_inode;
	int error = may_create(dir, dentry);
	if (error)
		return error;

	mode &= S_IALLUGO;
	mode |= S_IFREG;
	error = security_inode_create(dir, dentry, mode);
	if (error)
		return error;
	error = f(dentry, mode, arg);
	if (!error)
		fsnotify_create(dir, dentry);
	return error;
}
EXPORT_SYMBOL(vfs_mkobj);

bool may_open_dev(const struct path *path)
{
	return !(path->mnt->mnt_flags & MNT_NODEV) &&
+4 −0
Original line number Diff line number Diff line
@@ -1608,6 +1608,10 @@ extern int vfs_whiteout(struct inode *, struct dentry *);
extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
				  int open_flag);

int vfs_mkobj(struct dentry *, umode_t,
		int (*f)(struct dentry *, umode_t, void *),
		void *);

/*
 * VFS file helper functions.
 */
+111 −130
Original line number Diff line number Diff line
@@ -270,13 +270,30 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
		 * that means the min(mq_maxmsg, max_priorities) * struct
		 * posix_msg_tree_node.
		 */

		ret = -EINVAL;
		if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0)
			goto out_inode;
		if (capable(CAP_SYS_RESOURCE)) {
			if (info->attr.mq_maxmsg > HARD_MSGMAX ||
			    info->attr.mq_msgsize > HARD_MSGSIZEMAX)
				goto out_inode;
		} else {
			if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max ||
					info->attr.mq_msgsize > ipc_ns->mq_msgsize_max)
				goto out_inode;
		}
		ret = -EOVERFLOW;
		/* check for overflow */
		if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg)
			goto out_inode;
		mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
			min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
			sizeof(struct posix_msg_tree_node);

		mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
					  info->attr.mq_msgsize);

		mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
		if (mq_bytes + mq_treesize < mq_bytes)
			goto out_inode;
		mq_bytes += mq_treesize;
		spin_lock(&mq_lock);
		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
@@ -308,8 +325,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
	struct inode *inode;
	struct ipc_namespace *ns = sb->s_fs_info;
	struct ipc_namespace *ns = data;

	sb->s_fs_info = ns;
	sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
	sb->s_blocksize = PAGE_SIZE;
	sb->s_blocksize_bits = PAGE_SHIFT;
@@ -326,18 +344,44 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
	return 0;
}

static struct file_system_type mqueue_fs_type;
/*
 * Return value is pinned only by reference in ->mq_mnt; it will
 * live until ipcns dies.  Caller does not need to drop it.
 */
static struct vfsmount *mq_internal_mount(void)
{
	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
	struct vfsmount *m = ns->mq_mnt;
	if (m)
		return m;
	m = kern_mount_data(&mqueue_fs_type, ns);
	spin_lock(&mq_lock);
	if (unlikely(ns->mq_mnt)) {
		spin_unlock(&mq_lock);
		if (!IS_ERR(m))
			kern_unmount(m);
		return ns->mq_mnt;
	}
	if (!IS_ERR(m))
		ns->mq_mnt = m;
	spin_unlock(&mq_lock);
	return m;
}

static struct dentry *mqueue_mount(struct file_system_type *fs_type,
			 int flags, const char *dev_name,
			 void *data)
{
	struct ipc_namespace *ns;
	if (flags & SB_KERNMOUNT) {
		ns = data;
		data = NULL;
	} else {
		ns = current->nsproxy->ipc_ns;
	}
	return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super);
	struct vfsmount *m;
	if (flags & SB_KERNMOUNT)
		return mount_nodev(fs_type, flags, data, mqueue_fill_super);
	m = mq_internal_mount();
	if (IS_ERR(m))
		return ERR_CAST(m);
	atomic_inc(&m->mnt_sb->s_active);
	down_write(&m->mnt_sb->s_umount);
	return dget(m->mnt_root);
}

static void init_once(void *foo)
@@ -416,11 +460,11 @@ static void mqueue_evict_inode(struct inode *inode)
		put_ipc_ns(ipc_ns);
}

static int mqueue_create(struct inode *dir, struct dentry *dentry,
				umode_t mode, bool excl)
static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
{
	struct inode *dir = dentry->d_parent->d_inode;
	struct inode *inode;
	struct mq_attr *attr = dentry->d_fsdata;
	struct mq_attr *attr = arg;
	int error;
	struct ipc_namespace *ipc_ns;

@@ -461,6 +505,12 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
	return error;
}

static int mqueue_create(struct inode *dir, struct dentry *dentry,
				umode_t mode, bool excl)
{
	return mqueue_create_attr(dentry, mode, NULL);
}

static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
{
	struct inode *inode = d_inode(dentry);
@@ -691,96 +741,46 @@ static void remove_notification(struct mqueue_inode_info *info)
	info->notify_user_ns = NULL;
}

static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
{
	int mq_treesize;
	unsigned long total_size;

	if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
		return -EINVAL;
	if (capable(CAP_SYS_RESOURCE)) {
		if (attr->mq_maxmsg > HARD_MSGMAX ||
		    attr->mq_msgsize > HARD_MSGSIZEMAX)
			return -EINVAL;
	} else {
		if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
				attr->mq_msgsize > ipc_ns->mq_msgsize_max)
			return -EINVAL;
	}
	/* check for overflow */
	if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
		return -EOVERFLOW;
	mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) +
		min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) *
		sizeof(struct posix_msg_tree_node);
	total_size = attr->mq_maxmsg * attr->mq_msgsize;
	if (total_size + mq_treesize < total_size)
		return -EOVERFLOW;
	return 0;
}

/*
 * Invoked when creating a new queue via sys_mq_open
 */
static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir,
			struct path *path, int oflag, umode_t mode,
static int prepare_open(struct dentry *dentry, int oflag, int ro,
			umode_t mode, struct filename *name,
			struct mq_attr *attr)
{
	const struct cred *cred = current_cred();
	int ret;

	if (attr) {
		ret = mq_attr_ok(ipc_ns, attr);
		if (ret)
			return ERR_PTR(ret);
		/* store for use during create */
		path->dentry->d_fsdata = attr;
	} else {
		struct mq_attr def_attr;

		def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
					 ipc_ns->mq_msg_default);
		def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
					  ipc_ns->mq_msgsize_default);
		ret = mq_attr_ok(ipc_ns, &def_attr);
		if (ret)
			return ERR_PTR(ret);
	}

	mode &= ~current_umask();
	ret = vfs_create(dir, path->dentry, mode, true);
	path->dentry->d_fsdata = NULL;
	if (ret)
		return ERR_PTR(ret);
	return dentry_open(path, oflag, cred);
}

/* Opens existing queue */
static struct file *do_open(struct path *path, int oflag)
{
	static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
						  MAY_READ | MAY_WRITE };
	int acc;

	if (d_really_is_negative(dentry)) {
		if (!(oflag & O_CREAT))
			return -ENOENT;
		if (ro)
			return ro;
		audit_inode_parent_hidden(name, dentry->d_parent);
		return vfs_mkobj(dentry, mode & ~current_umask(),
				  mqueue_create_attr, attr);
	}
	/* it already existed */
	audit_inode(name, dentry, 0);
	if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
		return -EEXIST;
	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
		return ERR_PTR(-EINVAL);
		return -EINVAL;
	acc = oflag2acc[oflag & O_ACCMODE];
	if (inode_permission(d_inode(path->dentry), acc))
		return ERR_PTR(-EACCES);
	return dentry_open(path, oflag, current_cred());
	return inode_permission(d_inode(dentry), acc);
}

static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
		      struct mq_attr *attr)
{
	struct path path;
	struct file *filp;
	struct vfsmount *mnt = mq_internal_mount();
	struct dentry *root;
	struct filename *name;
	struct path path;
	int fd, error;
	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
	struct vfsmount *mnt = ipc_ns->mq_mnt;
	struct dentry *root = mnt->mnt_root;
	int ro;

	if (IS_ERR(mnt))
		return PTR_ERR(mnt);

	audit_mq_open(oflag, mode, attr);

	if (IS_ERR(name = getname(u_name)))
@@ -791,7 +791,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
		goto out_putname;

	ro = mnt_want_write(mnt);	/* we'll drop it in any case */
	error = 0;
	root = mnt->mnt_root;
	inode_lock(d_inode(root));
	path.dentry = lookup_one_len(name->name, root, strlen(name->name));
	if (IS_ERR(path.dentry)) {
@@ -799,38 +799,14 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
		goto out_putfd;
	}
	path.mnt = mntget(mnt);

	if (oflag & O_CREAT) {
		if (d_really_is_positive(path.dentry)) {	/* entry already exists */
			audit_inode(name, path.dentry, 0);
			if (oflag & O_EXCL) {
				error = -EEXIST;
				goto out;
			}
			filp = do_open(&path, oflag);
		} else {
			if (ro) {
				error = ro;
				goto out;
			}
			audit_inode_parent_hidden(name, root);
			filp = do_create(ipc_ns, d_inode(root), &path,
					 oflag, mode, attr);
		}
	} else {
		if (d_really_is_negative(path.dentry)) {
			error = -ENOENT;
			goto out;
		}
		audit_inode(name, path.dentry, 0);
		filp = do_open(&path, oflag);
	}

	if (!IS_ERR(filp))
		fd_install(fd, filp);
	error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
	if (!error) {
		struct file *file = dentry_open(&path, oflag, current_cred());
		if (!IS_ERR(file))
			fd_install(fd, file);
		else
		error = PTR_ERR(filp);
out:
			error = PTR_ERR(file);
	}
	path_put(&path);
out_putfd:
	if (error) {
@@ -864,6 +840,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
	struct vfsmount *mnt = ipc_ns->mq_mnt;

	if (!mnt)
		return -ENOENT;

	name = getname(u_name);
	if (IS_ERR(name))
		return PTR_ERR(name);
@@ -1590,28 +1569,26 @@ int mq_init_ns(struct ipc_namespace *ns)
	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
	ns->mq_msg_default   = DFLT_MSG;
	ns->mq_msgsize_default  = DFLT_MSGSIZE;

	ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
	if (IS_ERR(ns->mq_mnt)) {
		int err = PTR_ERR(ns->mq_mnt);
	ns->mq_mnt = NULL;
		return err;
	}

	return 0;
}

void mq_clear_sbinfo(struct ipc_namespace *ns)
{
	if (ns->mq_mnt)
		ns->mq_mnt->mnt_sb->s_fs_info = NULL;
}

void mq_put_mnt(struct ipc_namespace *ns)
{
	if (ns->mq_mnt)
		kern_unmount(ns->mq_mnt);
}

static int __init init_mqueue_fs(void)
{
	struct vfsmount *m;
	int error;

	mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
@@ -1633,6 +1610,10 @@ static int __init init_mqueue_fs(void)
	if (error)
		goto out_filesystem;

	m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
	if (IS_ERR(m))
		goto out_filesystem;
	init_ipc_ns.mq_mnt = m;
	return 0;

out_filesystem:
+22 −28
Original line number Diff line number Diff line
@@ -150,39 +150,29 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
	return 0;
}

static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
			 umode_t mode, const struct inode_operations *iops)
static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
			 const struct inode_operations *iops)
{
	struct inode *inode;

	inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
	struct inode *dir = dentry->d_parent->d_inode;
	struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	inode->i_op = iops;
	inode->i_private = dentry->d_fsdata;
	inode->i_private = raw;

	bpf_dentry_finalize(dentry, inode, dir);
	return 0;
}

static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode,
		     dev_t devt)
static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
{
	enum bpf_type type = MINOR(devt);

	if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) ||
	    dentry->d_fsdata == NULL)
		return -EPERM;

	switch (type) {
	case BPF_TYPE_PROG:
		return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops);
	case BPF_TYPE_MAP:
		return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops);
	default:
		return -EPERM;
	return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
}

static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
{
	return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
}

static struct dentry *
@@ -218,7 +208,6 @@ static int bpf_symlink(struct inode *dir, struct dentry *dentry,

static const struct inode_operations bpf_dir_iops = {
	.lookup		= bpf_lookup,
	.mknod		= bpf_mkobj,
	.mkdir		= bpf_mkdir,
	.symlink	= bpf_symlink,
	.rmdir		= simple_rmdir,
@@ -234,7 +223,6 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
	struct inode *dir;
	struct path path;
	umode_t mode;
	dev_t devt;
	int ret;

	dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
@@ -242,9 +230,8 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
		return PTR_ERR(dentry);

	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
	devt = MKDEV(UNNAMED_MAJOR, type);

	ret = security_path_mknod(&path, dentry, mode, devt);
	ret = security_path_mknod(&path, dentry, mode, 0);
	if (ret)
		goto out;

@@ -254,9 +241,16 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
		goto out;
	}

	dentry->d_fsdata = raw;
	ret = vfs_mknod(dir, dentry, mode, devt);
	dentry->d_fsdata = NULL;
	switch (type) {
	case BPF_TYPE_PROG:
		ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw);
		break;
	case BPF_TYPE_MAP:
		ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
		break;
	default:
		ret = -EPERM;
	}
out:
	done_path_create(&path, dentry);
	return ret;