Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0e1e7c7a authored by Christoph Lameter's avatar Christoph Lameter Committed by Linus Torvalds
Browse files

Memoryless nodes: Use N_HIGH_MEMORY for cpusets



cpusets try to ensure that any node added to a cpuset's mems_allowed is
on-line and contains memory.  The assumption was that online nodes contained
memory.  Thus, it is possible to add memoryless nodes to a cpuset and then add
tasks to this cpuset.  This results in continuous series of oom-kill and
apparent system hang.

Change cpusets to use node_states[N_HIGH_MEMORY] [a.k.a.  node_memory_map] in
place of node_online_map when vetting memories.  Return error if admin
attempts to write a non-empty mems_allowed node mask containing only
memoryless-nodes.

Signed-off-by: default avatarLee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: default avatarBob Picco <bob.picco@hp.com>
Signed-off-by: default avatarNishanth Aravamudan <nacc@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@skynet.ie>
Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 523b9458
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -35,7 +35,8 @@ CONTENTS:
----------------------

Cpusets provide a mechanism for assigning a set of CPUs and Memory
Nodes to a set of tasks.
Nodes to a set of tasks.   In this document "Memory Node" refers to
an on-line node that contains memory.

Cpusets constrain the CPU and Memory placement of tasks to only
the resources within a tasks current cpuset.  They form a nested
@@ -220,8 +221,8 @@ and name space for cpusets, with a minimum of additional kernel code.
The cpus and mems files in the root (top_cpuset) cpuset are
read-only.  The cpus file automatically tracks the value of
cpu_online_map using a CPU hotplug notifier, and the mems file
automatically tracks the value of node_online_map using the
cpuset_track_online_nodes() hook.
automatically tracks the value of node_states[N_MEMORY]--i.e.,
nodes with memory--using the cpuset_track_online_nodes() hook.


1.4 What are exclusive cpusets ?
+1 −1
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
	return node_possible_map;
}

#define cpuset_current_mems_allowed (node_online_map)
#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
static inline void cpuset_init_current_mems_allowed(void) {}
static inline void cpuset_update_task_memory_state(void) {}
#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
+38 −18
Original line number Diff line number Diff line
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)

/*
 * Return in *pmask the portion of a cpusets's mems_allowed that
 * are online.  If none are online, walk up the cpuset hierarchy
 * until we find one that does have some online mems.  If we get
 * all the way to the top and still haven't found any online mems,
 * return node_online_map.
 * are online, with memory.  If none are online with memory, walk
 * up the cpuset hierarchy until we find one that does have some
 * online mems.  If we get all the way to the top and still haven't
 * found any online mems, return node_states[N_HIGH_MEMORY].
 *
 * One way or another, we guarantee to return some non-empty subset
 * of node_online_map.
 * of node_states[N_HIGH_MEMORY].
 *
 * Call with callback_mutex held.
 */

static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
{
	while (cs && !nodes_intersects(cs->mems_allowed, node_online_map))
	while (cs && !nodes_intersects(cs->mems_allowed,
					node_states[N_HIGH_MEMORY]))
		cs = cs->parent;
	if (cs)
		nodes_and(*pmask, cs->mems_allowed, node_online_map);
		nodes_and(*pmask, cs->mems_allowed,
					node_states[N_HIGH_MEMORY]);
	else
		*pmask = node_online_map;
	BUG_ON(!nodes_intersects(*pmask, node_online_map));
		*pmask = node_states[N_HIGH_MEMORY];
	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
}

/**
@@ -924,7 +926,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
	int fudge;
	int retval;

	/* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
	/*
	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
	 * it's read-only
	 */
	if (cs == &top_cpuset)
		return -EACCES;

@@ -941,8 +946,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
		retval = nodelist_parse(buf, trialcs.mems_allowed);
		if (retval < 0)
			goto done;
		if (!nodes_intersects(trialcs.mems_allowed,
						node_states[N_HIGH_MEMORY])) {
			/*
			 * error if only memoryless nodes specified.
			 */
			retval = -ENOSPC;
			goto done;
		}
	}
	nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
	/*
	 * Exclude memoryless nodes.  We know that trialcs.mems_allowed
	 * contains at least one node with memory.
	 */
	nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
						node_states[N_HIGH_MEMORY]);
	oldmem = cs->mems_allowed;
	if (nodes_equal(oldmem, trialcs.mems_allowed)) {
		retval = 0;		/* Too easy - nothing to do */
@@ -2098,8 +2116,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)

/*
 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
 * cpu_online_map and node_online_map.  Force the top cpuset to track
 * whats online after any CPU or memory node hotplug or unplug event.
 * cpu_online_map and node_states[N_HIGH_MEMORY].  Force the top cpuset to
 * track what's online after any CPU or memory node hotplug or unplug
 * event.
 *
 * To ensure that we don't remove a CPU or node from the top cpuset
 * that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2138,7 @@ static void common_cpu_mem_hotplug_unplug(void)

	guarantee_online_cpus_mems_in_subtree(&top_cpuset);
	top_cpuset.cpus_allowed = cpu_online_map;
	top_cpuset.mems_allowed = node_online_map;
	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];

	mutex_unlock(&callback_mutex);
	mutex_unlock(&manage_mutex);
@@ -2147,8 +2166,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,

#ifdef CONFIG_MEMORY_HOTPLUG
/*
 * Keep top_cpuset.mems_allowed tracking node_online_map.
 * Call this routine anytime after you change node_online_map.
 * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
 * Call this routine anytime after you change
 * node_states[N_HIGH_MEMORY].
 * See also the previous routine cpuset_handle_cpuhp().
 */

@@ -2167,7 +2187,7 @@ void cpuset_track_online_nodes(void)
void __init cpuset_init_smp(void)
{
	top_cpuset.cpus_allowed = cpu_online_map;
	top_cpuset.mems_allowed = node_online_map;
	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];

	hotcpu_notifier(cpuset_handle_cpuhp, 0);
}
@@ -2309,7 +2329,7 @@ void cpuset_init_current_mems_allowed(void)
 *
 * Description: Returns the nodemask_t mems_allowed of the cpuset
 * attached to the specified @tsk.  Guaranteed to return some non-empty
 * subset of node_online_map, even if this means going outside the
 * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
 * tasks cpuset.
 **/