Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0a01f2cc authored by Eric W. Biederman's avatar Eric W. Biederman
Browse files

pidns: Make the pidns proc mount/umount logic obvious.



Track the number of pids in the proc hash table.  When the number of
pids goes to 0 schedule work to unmount the kernel mount of proc.

Move the mount of proc into alloc_pid when we allocate the pid for
init.

Remove the surprising calls of pid_ns_release proc in fork and
proc_flush_task.  Those code paths really shouldn't know about proc
namespace implementation details and people have demonstrated several
times that finding and understanding those code paths is difficult and
non-obvious.

Because of the call path detach pid is alwasy called with the
rtnl_lock held free_pid is not allowed to sleep, so the work to
unmounting proc is moved to a work queue.  This has the side benefit
of not blocking the entire world waiting for the unnecessary
rcu_barrier in deactivate_locked_super.

In the process of making the code clear and obvious this fixes a bug
reported by Gao feng <gaofeng@cn.fujitsu.com> where we would leak a
mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns
succeeded and copy_net_ns failed.

Acked-by: default avatar"Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
parent 17cf22c3
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -2590,10 +2590,6 @@ void proc_flush_task(struct task_struct *task)
		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
					tgid->numbers[i].nr);
	}

	upid = &pid->numbers[pid->level];
	if (upid->nr == 1)
		pid_ns_release_proc(upid->ns);
}

static struct dentry *proc_pid_instantiate(struct inode *dir,
+0 −5
Original line number Diff line number Diff line
@@ -155,11 +155,6 @@ void __init proc_root_init(void)
	err = register_filesystem(&proc_fs_type);
	if (err)
		return;
	err = pid_ns_prepare_proc(&init_pid_ns);
	if (err) {
		unregister_filesystem(&proc_fs_type);
		return;
	}

	proc_self_init();
	proc_symlink("mounts", NULL, "self/mounts");
+2 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ struct pid_namespace {
	struct kref kref;
	struct pidmap pidmap[PIDMAP_ENTRIES];
	int last_pid;
	int nr_hashed;
	struct task_struct *child_reaper;
	struct kmem_cache *pid_cachep;
	unsigned int level;
@@ -32,6 +33,7 @@ struct pid_namespace {
	struct bsd_acct_struct *bacct;
#endif
	struct user_namespace *user_ns;
	struct work_struct proc_work;
	kgid_t pid_gid;
	int hide_pid;
	int reboot;	/* group exit code if this pidns was rebooted */
+0 −2
Original line number Diff line number Diff line
@@ -1476,8 +1476,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
	if (p->io_context)
		exit_io_context(p);
bad_fork_cleanup_namespaces:
	if (unlikely(clone_flags & CLONE_NEWPID))
		pid_ns_release_proc(p->nsproxy->pid_ns);
	exit_task_namespaces(p);
bad_fork_cleanup_mm:
	if (p->mm)
+17 −4
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
#include <linux/proc_fs.h>

#define pid_hashfn(nr, ns)	\
	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -270,8 +271,12 @@ void free_pid(struct pid *pid)
	unsigned long flags;

	spin_lock_irqsave(&pidmap_lock, flags);
	for (i = 0; i <= pid->level; i++)
		hlist_del_rcu(&pid->numbers[i].pid_chain);
	for (i = 0; i <= pid->level; i++) {
		struct upid *upid = pid->numbers + i;
		hlist_del_rcu(&upid->pid_chain);
		if (--upid->ns->nr_hashed == 0)
			schedule_work(&upid->ns->proc_work);
	}
	spin_unlock_irqrestore(&pidmap_lock, flags);

	for (i = 0; i <= pid->level; i++)
@@ -293,6 +298,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
		goto out;

	tmp = ns;
	pid->level = ns->level;
	for (i = ns->level; i >= 0; i--) {
		nr = alloc_pidmap(tmp);
		if (nr < 0)
@@ -303,17 +309,23 @@ struct pid *alloc_pid(struct pid_namespace *ns)
		tmp = tmp->parent;
	}

	if (unlikely(is_child_reaper(pid))) {
		if (pid_ns_prepare_proc(ns))
			goto out_free;
	}

	get_pid_ns(ns);
	pid->level = ns->level;
	atomic_set(&pid->count, 1);
	for (type = 0; type < PIDTYPE_MAX; ++type)
		INIT_HLIST_HEAD(&pid->tasks[type]);

	upid = pid->numbers + ns->level;
	spin_lock_irq(&pidmap_lock);
	for ( ; upid >= pid->numbers; --upid)
	for ( ; upid >= pid->numbers; --upid) {
		hlist_add_head_rcu(&upid->pid_chain,
				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
		upid->ns->nr_hashed++;
	}
	spin_unlock_irq(&pidmap_lock);

out:
@@ -570,6 +582,7 @@ void __init pidmap_init(void)
	/* Reserve PID 0. We never call free_pidmap(0) */
	set_bit(0, init_pid_ns.pidmap[0].page);
	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
	init_pid_ns.nr_hashed = 1;

	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
Loading