Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 78725596 authored by Eric W. Biederman's avatar Eric W. Biederman
Browse files

Merge branch 'nsfs-ioctls' into HEAD

From: Andrey Vagin <avagin@openvz.org>

Each namespace has an owning user namespace and now there is not way
to discover these relationships.

Pid and user namepaces are hierarchical. There is no way to discover
parent-child relationships too.

Why we may want to know relationships between namespaces?

One use would be visualization, in order to understand the running
system.  Another would be to answer the question: what capability does
process X have to perform operations on a resource governed by namespace
Y?

One more use-case (which usually called abnormal) is checkpoint/restart.
In CRIU we are going to dump and restore nested namespaces.

There [1] was a discussion about which interface to choose to determing
relationships between namespaces.

Eric suggested to add two ioctl-s [2]:
> Grumble, Grumble.  I think this may actually a case for creating ioctls
> for these two cases.  Now that random nsfs file descriptors are bind
> mountable the original reason for using proc files is not as pressing.
>
> One ioctl for the user namespace that owns a file descriptor.
> One ioctl for the parent namespace of a namespace file descriptor.

Here is an implementaions of these ioctl-s.

$ man man7/namespaces.7
...
Since  Linux  4.X,  the  following  ioctl(2)  calls are supported for
namespace file descriptors.  The correct syntax is:

      fd = ioctl(ns_fd, ioctl_type);

where ioctl_type is one of the following:

NS_GET_USERNS
      Returns a file descriptor that refers to an owning user names‐
      pace.

NS_GET_PARENT
      Returns  a  file descriptor that refers to a parent namespace.
      This ioctl(2) can be used for pid  and  user  namespaces.  For
      user namespaces, NS_GET_PARENT and NS_GET_USERNS have the same
      meaning.

In addition to generic ioctl(2) errors, the following  specific  ones
can occur:

EINVAL NS_GET_PARENT was called for a nonhierarchical namespace.

EPERM  The  requested  namespace  is outside of the current namespace
      scope.

[1] https://lkml.org/lkml/2016/7/6/158
[2] https://lkml.org/lkml/2016/7/9/101

Changes for v2:
* don't return ENOENT for init_user_ns and init_pid_ns. There is nothing
  outside of the init namespace, so we can return EPERM in this case too.
  > The fewer special cases the easier the code is to get
  > correct, and the easier it is to read. // Eric

Changes for v3:
* rename ns->get_owner() to ns->owner(). get_* usually means that it
  grabs a reference.

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: "W. Trevor King" <wking@tremily.us>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
parents 93f0a88b 6ad92bf6
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -3368,10 +3368,16 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
	return 0;
}

static struct user_namespace *mntns_owner(struct ns_common *ns)
{
	return to_mnt_ns(ns)->user_ns;
}

const struct proc_ns_operations mntns_operations = {
	.name		= "mnt",
	.type		= CLONE_NEWNS,
	.get		= mntns_get,
	.put		= mntns_put,
	.install	= mntns_install,
	.owner		= mntns_owner,
};
+87 −13
Original line number Diff line number Diff line
@@ -5,11 +5,16 @@
#include <linux/magic.h>
#include <linux/ktime.h>
#include <linux/seq_file.h>
#include <linux/user_namespace.h>
#include <linux/nsfs.h>

static struct vfsmount *nsfs_mnt;

static long ns_ioctl(struct file *filp, unsigned int ioctl,
			unsigned long arg);
static const struct file_operations ns_file_operations = {
	.llseek		= no_llseek,
	.unlocked_ioctl = ns_ioctl,
};

static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
	ns->ops->put(ns);
}

void *ns_get_path(struct path *path, struct task_struct *task,
			const struct proc_ns_operations *ns_ops)
static void *__ns_get_path(struct path *path, struct ns_common *ns)
{
	struct vfsmount *mnt = mntget(nsfs_mnt);
	struct qstr qname = { .name = "", };
	struct dentry *dentry;
	struct inode *inode;
	struct ns_common *ns;
	unsigned long d;

again:
	ns = ns_ops->get(task);
	if (!ns) {
		mntput(mnt);
		return ERR_PTR(-ENOENT);
	}
	rcu_read_lock();
	d = atomic_long_read(&ns->stashed);
	if (!d)
@@ -68,7 +65,7 @@ void *ns_get_path(struct path *path, struct task_struct *task,
	if (!lockref_get_not_dead(&dentry->d_lockref))
		goto slow;
	rcu_read_unlock();
	ns_ops->put(ns);
	ns->ops->put(ns);
got_it:
	path->mnt = mnt;
	path->dentry = dentry;
@@ -77,7 +74,7 @@ void *ns_get_path(struct path *path, struct task_struct *task,
	rcu_read_unlock();
	inode = new_inode_pseudo(mnt->mnt_sb);
	if (!inode) {
		ns_ops->put(ns);
		ns->ops->put(ns);
		mntput(mnt);
		return ERR_PTR(-ENOMEM);
	}
@@ -95,17 +92,94 @@ void *ns_get_path(struct path *path, struct task_struct *task,
		return ERR_PTR(-ENOMEM);
	}
	d_instantiate(dentry, inode);
	dentry->d_fsdata = (void *)ns_ops;
	dentry->d_fsdata = (void *)ns->ops;
	d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
	if (d) {
		d_delete(dentry);	/* make sure ->d_prune() does nothing */
		dput(dentry);
		mntput(mnt);
		cpu_relax();
		goto again;
		return ERR_PTR(-EAGAIN);
	}
	goto got_it;
}

void *ns_get_path(struct path *path, struct task_struct *task,
			const struct proc_ns_operations *ns_ops)
{
	struct ns_common *ns;
	void *ret;

again:
	ns = ns_ops->get(task);
	if (!ns)
		return ERR_PTR(-ENOENT);

	ret = __ns_get_path(path, ns);
	if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
		goto again;
	return ret;
}

static int open_related_ns(struct ns_common *ns,
		   struct ns_common *(*get_ns)(struct ns_common *ns))
{
	struct path path = {};
	struct file *f;
	void *err;
	int fd;

	fd = get_unused_fd_flags(O_CLOEXEC);
	if (fd < 0)
		return fd;

	while (1) {
		struct ns_common *relative;

		relative = get_ns(ns);
		if (IS_ERR(relative)) {
			put_unused_fd(fd);
			return PTR_ERR(relative);
		}

		err = __ns_get_path(&path, relative);
		if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
			continue;
		break;
	}
	if (IS_ERR(err)) {
		put_unused_fd(fd);
		return PTR_ERR(err);
	}

	f = dentry_open(&path, O_RDONLY, current_cred());
	path_put(&path);
	if (IS_ERR(f)) {
		put_unused_fd(fd);
		fd = PTR_ERR(f);
	} else
		fd_install(fd, f);

	return fd;
}

static long ns_ioctl(struct file *filp, unsigned int ioctl,
			unsigned long arg)
{
	struct ns_common *ns = get_proc_ns(file_inode(filp));

	switch (ioctl) {
	case NS_GET_USERNS:
		return open_related_ns(ns, ns_get_owner);
	case NS_GET_PARENT:
		if (!ns->ops->get_parent)
			return -EINVAL;
		return open_related_ns(ns, ns->ops->get_parent);
	default:
		return -ENOTTY;
	}
}

int ns_get_name(char *buf, size_t size, struct task_struct *task,
			const struct proc_ns_operations *ns_ops)
{
+2 −0
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@ struct proc_ns_operations {
	struct ns_common *(*get)(struct task_struct *task);
	void (*put)(struct ns_common *ns);
	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
	struct user_namespace *(*owner)(struct ns_common *ns);
	struct ns_common *(*get_parent)(struct ns_common *ns);
};

extern const struct proc_ns_operations netns_operations;
+7 −0
Original line number Diff line number Diff line
@@ -106,6 +106,8 @@ extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t,
extern int proc_setgroups_show(struct seq_file *m, void *v);
extern bool userns_may_setgroups(const struct user_namespace *ns);
extern bool current_in_userns(const struct user_namespace *target_ns);

struct ns_common *ns_get_owner(struct ns_common *ns);
#else

static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -139,6 +141,11 @@ static inline bool current_in_userns(const struct user_namespace *target_ns)
{
	return true;
}

static inline struct ns_common *ns_get_owner(struct ns_common *ns)
{
	return ERR_PTR(-EPERM);
}
#endif

#endif /* _LINUX_USER_H */
+13 −0
Original line number Diff line number Diff line
#ifndef __LINUX_NSFS_H
#define __LINUX_NSFS_H

#include <linux/ioctl.h>

#define NSIO	0xb7

/* Returns a file descriptor that refers to an owning user namespace */
#define NS_GET_USERNS	_IO(NSIO, 0x1)
/* Returns a file descriptor that refers to a parent namespace */
#define NS_GET_PARENT	_IO(NSIO, 0x2)

#endif /* __LINUX_NSFS_H */
Loading