Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e149ed2b authored by Al Viro's avatar Al Viro
Browse files

take the targets of /proc/*/ns/* symlinks to separate fs



New pseudo-filesystem: nsfs.  Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.).  Files on it *are* bindable - we explicitly permit that in do_loopback().

This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot.  The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).

Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present.  See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.

As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().

Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent f77c8014
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
		attr.o bad_inode.o file.o filesystems.o namespace.o \
		attr.o bad_inode.o file.o filesystems.o namespace.o \
		seq_file.o xattr.o libfs.o fs-writeback.o \
		seq_file.o xattr.o libfs.o fs-writeback.o \
		pnode.o splice.o sync.o utimes.o \
		pnode.o splice.o sync.o utimes.o \
		stack.o fs_struct.o statfs.o fs_pin.o
		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o


ifeq ($(CONFIG_BLOCK),y)
ifeq ($(CONFIG_BLOCK),y)
obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
+5 −0
Original line number Original line Diff line number Diff line
@@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops;
 */
 */
extern void sb_pin_kill(struct super_block *sb);
extern void sb_pin_kill(struct super_block *sb);
extern void mnt_pin_kill(struct mount *m);
extern void mnt_pin_kill(struct mount *m);

/*
 * fs/nsfs.c
 */
extern struct dentry_operations ns_dentry_operations;
+6 −3
Original line number Original line Diff line number Diff line
@@ -1569,8 +1569,8 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static bool is_mnt_ns_file(struct dentry *dentry)
static bool is_mnt_ns_file(struct dentry *dentry)
{
{
	/* Is this a proxy for a mount namespace? */
	/* Is this a proxy for a mount namespace? */
	struct inode *inode = dentry->d_inode;
	return dentry->d_op == &ns_dentry_operations &&
	return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations;
	       dentry->d_fsdata == &mntns_operations;
}
}


struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
@@ -2016,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name,
	if (IS_MNT_UNBINDABLE(old))
	if (IS_MNT_UNBINDABLE(old))
		goto out2;
		goto out2;


	if (!check_mnt(parent) || !check_mnt(old))
	if (!check_mnt(parent))
		goto out2;

	if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
		goto out2;
		goto out2;


	if (!recurse && has_locked_children(old, old_path.dentry))
	if (!recurse && has_locked_children(old, old_path.dentry))

fs/nsfs.c

0 → 100644
+161 −0
Original line number Original line Diff line number Diff line
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/ktime.h>

static struct vfsmount *nsfs_mnt;

static const struct file_operations ns_file_operations = {
	.llseek		= no_llseek,
};

static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
	struct inode *inode = dentry->d_inode;
	const struct proc_ns_operations *ns_ops = dentry->d_fsdata;

	return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
		ns_ops->name, inode->i_ino);
}

static void ns_prune_dentry(struct dentry *dentry)
{
	struct inode *inode = dentry->d_inode;
	if (inode) {
		struct ns_common *ns = inode->i_private;
		atomic_long_set(&ns->stashed, 0);
	}
}

const struct dentry_operations ns_dentry_operations =
{
	.d_prune	= ns_prune_dentry,
	.d_delete	= always_delete_dentry,
	.d_dname	= ns_dname,
};

static void nsfs_evict(struct inode *inode)
{
	struct ns_common *ns = inode->i_private;
	clear_inode(inode);
	ns->ops->put(ns);
}

void *ns_get_path(struct path *path, struct task_struct *task,
			const struct proc_ns_operations *ns_ops)
{
	struct vfsmount *mnt = mntget(nsfs_mnt);
	struct qstr qname = { .name = "", };
	struct dentry *dentry;
	struct inode *inode;
	struct ns_common *ns;
	unsigned long d;

again:
	ns = ns_ops->get(task);
	if (!ns) {
		mntput(mnt);
		return ERR_PTR(-ENOENT);
	}
	rcu_read_lock();
	d = atomic_long_read(&ns->stashed);
	if (!d)
		goto slow;
	dentry = (struct dentry *)d;
	if (!lockref_get_not_dead(&dentry->d_lockref))
		goto slow;
	rcu_read_unlock();
	ns_ops->put(ns);
got_it:
	path->mnt = mnt;
	path->dentry = dentry;
	return NULL;
slow:
	rcu_read_unlock();
	inode = new_inode_pseudo(mnt->mnt_sb);
	if (!inode) {
		ns_ops->put(ns);
		mntput(mnt);
		return ERR_PTR(-ENOMEM);
	}
	inode->i_ino = ns->inum;
	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
	inode->i_flags |= S_IMMUTABLE;
	inode->i_mode = S_IFREG | S_IRUGO;
	inode->i_fop = &ns_file_operations;
	inode->i_private = ns;

	dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
	if (!dentry) {
		iput(inode);
		mntput(mnt);
		return ERR_PTR(-ENOMEM);
	}
	d_instantiate(dentry, inode);
	dentry->d_fsdata = (void *)ns_ops;
	d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
	if (d) {
		d_delete(dentry);	/* make sure ->d_prune() does nothing */
		dput(dentry);
		cpu_relax();
		goto again;
	}
	goto got_it;
}

int ns_get_name(char *buf, size_t size, struct task_struct *task,
			const struct proc_ns_operations *ns_ops)
{
	struct ns_common *ns;
	int res = -ENOENT;
	ns = ns_ops->get(task);
	if (ns) {
		res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
		ns_ops->put(ns);
	}
	return res;
}

struct file *proc_ns_fget(int fd)
{
	struct file *file;

	file = fget(fd);
	if (!file)
		return ERR_PTR(-EBADF);

	if (file->f_op != &ns_file_operations)
		goto out_invalid;

	return file;

out_invalid:
	fput(file);
	return ERR_PTR(-EINVAL);
}

static const struct super_operations nsfs_ops = {
	.statfs = simple_statfs,
	.evict_inode = nsfs_evict,
};
static struct dentry *nsfs_mount(struct file_system_type *fs_type,
			int flags, const char *dev_name, void *data)
{
	return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
			&ns_dentry_operations, NSFS_MAGIC);
}
static struct file_system_type nsfs = {
	.name = "nsfs",
	.mount = nsfs_mount,
	.kill_sb = kill_anon_super,
};

void __init nsfs_init(void)
{
	nsfs_mnt = kern_mount(&nsfs);
	if (IS_ERR(nsfs_mnt))
		panic("can't set nsfs up\n");
	nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
}
+0 −5
Original line number Original line Diff line number Diff line
@@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode)
{
{
	struct proc_dir_entry *de;
	struct proc_dir_entry *de;
	struct ctl_table_header *head;
	struct ctl_table_header *head;
	struct ns_common *ns;


	truncate_inode_pages_final(&inode->i_data);
	truncate_inode_pages_final(&inode->i_data);
	clear_inode(inode);
	clear_inode(inode);
@@ -49,10 +48,6 @@ static void proc_evict_inode(struct inode *inode)
		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
		sysctl_head_put(head);
		sysctl_head_put(head);
	}
	}
	/* Release any associated namespace */
	ns = PROC_I(inode)->ns.ns;
	if (ns && ns->ops)
		ns->ops->put(ns);
}
}


static struct kmem_cache * proc_inode_cachep;
static struct kmem_cache * proc_inode_cachep;
Loading