Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9e781440 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds
Browse files

hold task->mempolicy while numa_maps scans.



  /proc/<pid>/numa_maps scans vma and show mempolicy under
  mmap_sem. It sometimes accesses task->mempolicy which can
  be freed without mmap_sem and numa_maps can show some
  garbage while scanning.

This patch tries to take reference count of task->mempolicy at reading
numa_maps before calling get_vma_policy(). By this, task->mempolicy
will not be freed until numa_maps reaches its end.

V2->v3
  -  updated comments to be more verbose.
  -  removed task_lock() in numa_maps code.
V1->V2
  -  access task->mempolicy only once and remember it.  Becase kernel/exit.c
     can overwrite it.

Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: default avatarDavid Rientjes <rientjes@google.com>
Acked-by: default avatarKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3b641bf4
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/sched.h>
#include <linux/proc_fs.h>
struct  ctl_table_header;
struct  mempolicy;

extern struct proc_dir_entry proc_root;
#ifdef CONFIG_PROC_SYSCTL
@@ -74,6 +75,9 @@ struct proc_maps_private {
#ifdef CONFIG_MMU
	struct vm_area_struct *tail_vma;
#endif
#ifdef CONFIG_NUMA
	struct mempolicy *task_mempolicy;
#endif
};

void proc_init_inodecache(void);
+47 −3
Original line number Diff line number Diff line
@@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len)
	seq_printf(m, "%*c", len, ' ');
}

#ifdef CONFIG_NUMA
/*
 * These functions are for numa_maps but called in generic **maps seq_file
 * ->start(), ->stop() ops.
 *
 * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
 * Each mempolicy object is controlled by reference counting. The problem here
 * is how to avoid accessing dead mempolicy object.
 *
 * Because we're holding mmap_sem while reading seq_file, it's safe to access
 * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
 *
 * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
 * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
 * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
 * gurantee the task never exits under us. But taking task_lock() around
 * get_vma_plicy() causes lock order problem.
 *
 * To access task->mempolicy without lock, we hold a reference count of an
 * object pointed by task->mempolicy and remember it. This will guarantee
 * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
 */
static void hold_task_mempolicy(struct proc_maps_private *priv)
{
	struct task_struct *task = priv->task;

	task_lock(task);
	priv->task_mempolicy = task->mempolicy;
	mpol_get(priv->task_mempolicy);
	task_unlock(task);
}
static void release_task_mempolicy(struct proc_maps_private *priv)
{
	mpol_put(priv->task_mempolicy);
}
#else
static void hold_task_mempolicy(struct proc_maps_private *priv)
{
}
static void release_task_mempolicy(struct proc_maps_private *priv)
{
}
#endif

static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
{
	if (vma && vma != priv->tail_vma) {
		struct mm_struct *mm = vma->vm_mm;
		release_task_mempolicy(priv);
		up_read(&mm->mmap_sem);
		mmput(mm);
	}
@@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)

	tail_vma = get_gate_vma(priv->task->mm);
	priv->tail_vma = tail_vma;

	hold_task_mempolicy(priv);
	/* Start with last addr hint */
	vma = find_vma(mm, last_addr);
	if (last_addr && vma) {
@@ -159,6 +204,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
	if (vma)
		return vma;

	release_task_mempolicy(priv);
	/* End of vmas has been reached */
	m->version = (tail_vma != NULL)? 0: -1UL;
	up_read(&mm->mmap_sem);
@@ -1178,11 +1224,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
	walk.private = md;
	walk.mm = mm;

	task_lock(task);
	pol = get_vma_policy(task, vma, vma->vm_start);
	mpol_to_str(buffer, sizeof(buffer), pol, 0);
	mpol_cond_put(pol);
	task_unlock(task);

	seq_printf(m, "%08lx %s", vma->vm_start, buffer);