Merge branch 'for-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup (2756d373) · Commits · e / devices / android_kernel_teracube_emerald

include/linux/cgroup.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -638,8 +638,10 @@ struct cgroup_subsys {
		struct cgroup_subsys_state (css_alloc)(struct cgroup_subsys_state *parent_css);
		int (css_online)(struct cgroup_subsys_state css);
		void (css_offline)(struct cgroup_subsys_state css);
		void (css_released)(struct cgroup_subsys_state css);
		void (css_free)(struct cgroup_subsys_state css);
		void (css_reset)(struct cgroup_subsys_state css);
		void (css_e_css_changed)(struct cgroup_subsys_state css);

		int (can_attach)(struct cgroup_subsys_state css,
		struct cgroup_taskset *tset);
		@@ -934,6 +936,8 @@ void css_task_iter_end(struct css_task_iter *it);
		int cgroup_attach_task_all(struct task_struct from, struct task_struct );
		int cgroup_transfer_tasks(struct cgroup to, struct cgroup from);

		struct cgroup_subsys_state cgroup_get_e_css(struct cgroup cgroup,
		struct cgroup_subsys *ss);
		struct cgroup_subsys_state css_tryget_online_from_dir(struct dentry dentry,
		struct cgroup_subsys *ss);

include/linux/cpuset.h

+7 −30

Original line number	Diff line number	Diff line
		@@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
		void cpuset_init_current_mems_allowed(void);
		int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);

		extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask);
		extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
		extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);

		static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
		static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
		{
		return nr_cpusets() <= 1 \|\|
		__cpuset_node_allowed_softwall(node, gfp_mask);
		return nr_cpusets() <= 1 \|\| __cpuset_node_allowed(node, gfp_mask);
		}

		static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
		static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
		{
		return nr_cpusets() <= 1 \|\|
		__cpuset_node_allowed_hardwall(node, gfp_mask);
		}

		static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
		{
		return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask);
		}

		static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
		{
		return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask);
		return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
		}

		extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
		@@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
		return 1;
		}

		static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
		{
		return 1;
		}

		static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
		{
		return 1;
		}

		static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
		static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
		{
		return 1;
		}

		static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
		static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
		{
		return 1;
		}

kernel/cgroup.c

+121 −54

Original line number	Diff line number	Diff line
		@@ -277,6 +277,10 @@ static struct cgroup_subsys_state cgroup_e_css(struct cgroup cgrp,
		if (!(cgrp->root->subsys_mask & (1 << ss->id)))
		return NULL;

		/*
		* This function is used while updating css associations and thus
		* can't test the csses directly. Use ->child_subsys_mask.
		*/
		while (cgroup_parent(cgrp) &&
		!(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
		cgrp = cgroup_parent(cgrp);
		@@ -284,6 +288,39 @@ static struct cgroup_subsys_state cgroup_e_css(struct cgroup cgrp,
		return cgroup_css(cgrp, ss);
		}

		/**
		* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
		* @cgrp: the cgroup of interest
		* @ss: the subsystem of interest
		*
		* Find and get the effective css of @cgrp for @ss. The effective css is
		* defined as the matching css of the nearest ancestor including self which
		* has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
		* the root css is returned, so this function always returns a valid css.
		* The returned css must be put using css_put().
		*/
		struct cgroup_subsys_state cgroup_get_e_css(struct cgroup cgrp,
		struct cgroup_subsys *ss)
		{
		struct cgroup_subsys_state *css;

		rcu_read_lock();

		do {
		css = cgroup_css(cgrp, ss);

		if (css && css_tryget_online(css))
		goto out_unlock;
		cgrp = cgroup_parent(cgrp);
		} while (cgrp);

		css = init_css_set.subsys[ss->id];
		css_get(css);
		out_unlock:
		rcu_read_unlock();
		return css;
		}

		/* convenient tests for these bits */
		static inline bool cgroup_is_dead(const struct cgroup *cgrp)
		{
		@@ -1019,31 +1056,30 @@ static void cgroup_put(struct cgroup *cgrp)
		}

		/**
		* cgroup_refresh_child_subsys_mask - update child_subsys_mask
		* cgroup_calc_child_subsys_mask - calculate child_subsys_mask
		* @cgrp: the target cgroup
		* @subtree_control: the new subtree_control mask to consider
		*
		* On the default hierarchy, a subsystem may request other subsystems to be
		* enabled together through its ->depends_on mask. In such cases, more
		* subsystems than specified in "cgroup.subtree_control" may be enabled.
		*
		* This function determines which subsystems need to be enabled given the
		* current @cgrp->subtree_control and records it in
		* @cgrp->child_subsys_mask. The resulting mask is always a superset of
		* @cgrp->subtree_control and follows the usual hierarchy rules.
		* This function calculates which subsystems need to be enabled if
		* @subtree_control is to be applied to @cgrp. The returned mask is always
		* a superset of @subtree_control and follows the usual hierarchy rules.
		*/
		static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
		static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
		unsigned int subtree_control)
		{
		struct cgroup *parent = cgroup_parent(cgrp);
		unsigned int cur_ss_mask = cgrp->subtree_control;
		unsigned int cur_ss_mask = subtree_control;
		struct cgroup_subsys *ss;
		int ssid;

		lockdep_assert_held(&cgroup_mutex);

		if (!cgroup_on_dfl(cgrp)) {
		cgrp->child_subsys_mask = cur_ss_mask;
		return;
		}
		if (!cgroup_on_dfl(cgrp))
		return cur_ss_mask;

		while (true) {
		unsigned int new_ss_mask = cur_ss_mask;
		@@ -1067,7 +1103,20 @@ static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
		cur_ss_mask = new_ss_mask;
		}

		cgrp->child_subsys_mask = cur_ss_mask;
		return cur_ss_mask;
		}

		/**
		* cgroup_refresh_child_subsys_mask - update child_subsys_mask
		* @cgrp: the target cgroup
		*
		* Update @cgrp->child_subsys_mask according to the current
		* @cgrp->subtree_control using cgroup_calc_child_subsys_mask().
		*/
		static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
		{
		cgrp->child_subsys_mask =
		cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control);
		}

		/**
		@@ -2641,7 +2690,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
		loff_t off)
		{
		unsigned int enable = 0, disable = 0;
		unsigned int css_enable, css_disable, old_ctrl, new_ctrl;
		unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
		struct cgroup cgrp, child;
		struct cgroup_subsys *ss;
		char *tok;
		@@ -2693,36 +2742,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
		ret = -ENOENT;
		goto out_unlock;
		}

		/*
		* @ss is already enabled through dependency and
		* we'll just make it visible. Skip draining.
		*/
		if (cgrp->child_subsys_mask & (1 << ssid))
		continue;

		/*
		* Because css offlining is asynchronous, userland
		* might try to re-enable the same controller while
		* the previous instance is still around. In such
		* cases, wait till it's gone using offline_waitq.
		*/
		cgroup_for_each_live_child(child, cgrp) {
		DEFINE_WAIT(wait);

		if (!cgroup_css(child, ss))
		continue;

		cgroup_get(child);
		prepare_to_wait(&child->offline_waitq, &wait,
		TASK_UNINTERRUPTIBLE);
		cgroup_kn_unlock(of->kn);
		schedule();
		finish_wait(&child->offline_waitq, &wait);
		cgroup_put(child);

		return restart_syscall();
		}
		} else if (disable & (1 << ssid)) {
		if (!(cgrp->subtree_control & (1 << ssid))) {
		disable &= ~(1 << ssid);
		@@ -2758,18 +2777,47 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
		* subsystems than specified may need to be enabled or disabled
		* depending on subsystem dependencies.
		*/
		cgrp->subtree_control \|= enable;
		cgrp->subtree_control &= ~disable;
		old_sc = cgrp->subtree_control;
		old_ss = cgrp->child_subsys_mask;
		new_sc = (old_sc \| enable) & ~disable;
		new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);

		old_ctrl = cgrp->child_subsys_mask;
		cgroup_refresh_child_subsys_mask(cgrp);
		new_ctrl = cgrp->child_subsys_mask;

		css_enable = ~old_ctrl & new_ctrl;
		css_disable = old_ctrl & ~new_ctrl;
		css_enable = ~old_ss & new_ss;
		css_disable = old_ss & ~new_ss;
		enable \|= css_enable;
		disable \|= css_disable;

		/*
		* Because css offlining is asynchronous, userland might try to
		* re-enable the same controller while the previous instance is
		* still around. In such cases, wait till it's gone using
		* offline_waitq.
		*/
		for_each_subsys(ss, ssid) {
		if (!(css_enable & (1 << ssid)))
		continue;

		cgroup_for_each_live_child(child, cgrp) {
		DEFINE_WAIT(wait);

		if (!cgroup_css(child, ss))
		continue;

		cgroup_get(child);
		prepare_to_wait(&child->offline_waitq, &wait,
		TASK_UNINTERRUPTIBLE);
		cgroup_kn_unlock(of->kn);
		schedule();
		finish_wait(&child->offline_waitq, &wait);
		cgroup_put(child);

		return restart_syscall();
		}
		}

		cgrp->subtree_control = new_sc;
		cgrp->child_subsys_mask = new_ss;

		/*
		* Create new csses or make the existing ones visible. A css is
		* created invisible if it's being implicitly enabled through
		@@ -2825,6 +2873,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
		}
		}

		/*
		* The effective csses of all the descendants (excluding @cgrp) may
		* have changed. Subsystems can optionally subscribe to this event
		* by implementing ->css_e_css_changed() which is invoked if any of
		* the effective csses seen from the css's cgroup may have changed.
		*/
		for_each_subsys(ss, ssid) {
		struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
		struct cgroup_subsys_state *css;

		if (!ss->css_e_css_changed \|\| !this_css)
		continue;

		css_for_each_descendant_pre(css, this_css)
		if (css != this_css)
		ss->css_e_css_changed(css);
		}

		kernfs_activate(cgrp->kn);
		ret = 0;
		out_unlock:
		@@ -2832,9 +2898,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
		return ret ?: nbytes;

		err_undo_css:
		cgrp->subtree_control &= ~enable;
		cgrp->subtree_control \|= disable;
		cgroup_refresh_child_subsys_mask(cgrp);
		cgrp->subtree_control = old_sc;
		cgrp->child_subsys_mask = old_ss;

		for_each_subsys(ss, ssid) {
		if (!(enable & (1 << ssid)))
		@@ -4370,6 +4435,8 @@ static void css_release_work_fn(struct work_struct *work)
		if (ss) {
		/* css release path */
		cgroup_idr_remove(&ss->css_idr, css->id);
		if (ss->css_released)
		ss->css_released(css);
		} else {
		/* cgroup release path */
		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);

kernel/cpuset.c

+57 −105

Original line number	Diff line number	Diff line
		@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = {
		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))

		/*
		* There are two global mutexes guarding cpuset structures - cpuset_mutex
		* and callback_mutex. The latter may nest inside the former. We also
		* require taking task_lock() when dereferencing a task's cpuset pointer.
		* See "The task_lock() exception", at the end of this comment.
		* There are two global locks guarding cpuset structures - cpuset_mutex and
		* callback_lock. We also require taking task_lock() when dereferencing a
		* task's cpuset pointer. See "The task_lock() exception", at the end of this
		* comment.
		*
		* A task must hold both mutexes to modify cpusets. If a task holds
		* A task must hold both locks to modify cpusets. If a task holds
		* cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
		* is the only task able to also acquire callback_mutex and be able to
		* is the only task able to also acquire callback_lock and be able to
		* modify cpusets. It can perform various checks on the cpuset structure
		* first, knowing nothing will change. It can also allocate memory while
		* just holding cpuset_mutex. While it is performing these checks, various
		* callback routines can briefly acquire callback_mutex to query cpusets.
		* Once it is ready to make the changes, it takes callback_mutex, blocking
		* callback routines can briefly acquire callback_lock to query cpusets.
		* Once it is ready to make the changes, it takes callback_lock, blocking
		* everyone else.
		*
		* Calls to the kernel memory allocator can not be made while holding
		* callback_mutex, as that would risk double tripping on callback_mutex
		* callback_lock, as that would risk double tripping on callback_lock
		* from one of the callbacks into the cpuset code from within
		* __alloc_pages().
		*
		* If a task is only holding callback_mutex, then it has read-only
		* If a task is only holding callback_lock, then it has read-only
		* access to cpusets.
		*
		* Now, the task_struct fields mems_allowed and mempolicy may be changed
		* by other task, we use alloc_lock in the task_struct fields to protect
		* them.
		*
		* The cpuset_common_file_read() handlers only hold callback_mutex across
		* The cpuset_common_file_read() handlers only hold callback_lock across
		* small pieces of code, such as when reading out possibly multi-word
		* cpumasks and nodemasks.
		*
		@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = {
		*/

		static DEFINE_MUTEX(cpuset_mutex);
		static DEFINE_MUTEX(callback_mutex);
		static DEFINE_SPINLOCK(callback_lock);

		/*
		* CPU / memory hotplug is handled asynchronously.
		@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = {
		* One way or another, we guarantee to return some non-empty subset
		* of cpu_online_mask.
		*
		* Call with callback_mutex held.
		* Call with callback_lock or cpuset_mutex held.
		*/
		static void guarantee_online_cpus(struct cpuset cs, struct cpumask pmask)
		{
		@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset cs, struct cpumask pmask)
		* One way or another, we guarantee to return some non-empty subset
		* of node_states[N_MEMORY].
		*
		* Call with callback_mutex held.
		* Call with callback_lock or cpuset_mutex held.
		*/
		static void guarantee_online_mems(struct cpuset cs, nodemask_t pmask)
		{
		@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset cs, nodemask_t pmask)
		/*
		* update task's spread flag if cpuset's page/slab spread flag is set
		*
		* Called with callback_mutex/cpuset_mutex held
		* Call with callback_lock or cpuset_mutex held.
		*/
		static void cpuset_update_task_spread_flag(struct cpuset *cs,
		struct task_struct *tsk)
		@@ -886,9 +886,9 @@ static void update_cpumasks_hier(struct cpuset cs, struct cpumask new_cpus)
		continue;
		rcu_read_unlock();

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cpumask_copy(cp->effective_cpus, new_cpus);
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
		!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
		@@ -953,9 +953,9 @@ static int update_cpumask(struct cpuset cs, struct cpuset trialcs,
		if (retval < 0)
		return retval;

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		/* use trialcs->cpus_allowed as a temp variable */
		update_cpumasks_hier(cs, trialcs->cpus_allowed);
		@@ -1142,9 +1142,9 @@ static void update_nodemasks_hier(struct cpuset cs, nodemask_t new_mems)
		continue;
		rcu_read_unlock();

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cp->effective_mems = *new_mems;
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
		!nodes_equal(cp->mems_allowed, cp->effective_mems));
		@@ -1165,7 +1165,7 @@ static void update_nodemasks_hier(struct cpuset cs, nodemask_t new_mems)
		* mempolicies and if the cpuset is marked 'memory_migrate',
		* migrate the tasks pages to the new memory.
		*
		* Call with cpuset_mutex held. May take callback_mutex during call.
		* Call with cpuset_mutex held. May take callback_lock during call.
		* Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
		* lock each such tasks mm->mmap_sem, scan its vma's and rebind
		* their mempolicies to the cpusets new mems_allowed.
		@@ -1212,9 +1212,9 @@ static int update_nodemask(struct cpuset cs, struct cpuset trialcs,
		if (retval < 0)
		goto done;

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cs->mems_allowed = trialcs->mems_allowed;
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		/* use trialcs->mems_allowed as a temp variable */
		update_nodemasks_hier(cs, &cs->mems_allowed);
		@@ -1305,9 +1305,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
		spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
		\|\| (is_spread_page(cs) != is_spread_page(trialcs)));

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cs->flags = trialcs->flags;
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
		rebuild_sched_domains_locked();
		@@ -1714,7 +1714,7 @@ static int cpuset_common_seq_show(struct seq_file sf, void v)
		count = seq_get_buf(sf, &buf);
		s = buf;

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);

		switch (type) {
		case FILE_CPULIST:
		@@ -1741,7 +1741,7 @@ static int cpuset_common_seq_show(struct seq_file sf, void v)
		seq_commit(sf, -1);
		}
		out_unlock:
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);
		return ret;
		}

		@@ -1958,12 +1958,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)

		cpuset_inc();

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		if (cgroup_on_dfl(cs->css.cgroup)) {
		cpumask_copy(cs->effective_cpus, parent->effective_cpus);
		cs->effective_mems = parent->effective_mems;
		}
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
		goto out_unlock;
		@@ -1990,10 +1990,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
		}
		rcu_read_unlock();

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cs->mems_allowed = parent->mems_allowed;
		cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);
		out_unlock:
		mutex_unlock(&cpuset_mutex);
		return 0;
		@@ -2032,7 +2032,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
		static void cpuset_bind(struct cgroup_subsys_state *root_css)
		{
		mutex_lock(&cpuset_mutex);
		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);

		if (cgroup_on_dfl(root_css->cgroup)) {
		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
		@@ -2043,7 +2043,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
		top_cpuset.mems_allowed = top_cpuset.effective_mems;
		}

		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);
		mutex_unlock(&cpuset_mutex);
		}

		@@ -2128,12 +2128,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
		{
		bool is_empty;

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cpumask_copy(cs->cpus_allowed, new_cpus);
		cpumask_copy(cs->effective_cpus, new_cpus);
		cs->mems_allowed = *new_mems;
		cs->effective_mems = *new_mems;
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		/*
		* Don't call update_tasks_cpumask() if the cpuset becomes empty,
		@@ -2170,10 +2170,10 @@ hotplug_update_tasks(struct cpuset *cs,
		if (nodes_empty(*new_mems))
		*new_mems = parent_cs(cs)->effective_mems;

		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		cpumask_copy(cs->effective_cpus, new_cpus);
		cs->effective_mems = *new_mems;
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);

		if (cpus_updated)
		update_tasks_cpumask(cs);
		@@ -2259,21 +2259,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)

		/* synchronize cpus_allowed to cpu_active_mask */
		if (cpus_updated) {
		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		if (!on_dfl)
		cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
		cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);
		/* we don't mess with cpumasks of tasks in top_cpuset */
		}

		/* synchronize mems_allowed to N_MEMORY */
		if (mems_updated) {
		mutex_lock(&callback_mutex);
		spin_lock_irq(&callback_lock);
		if (!on_dfl)
		top_cpuset.mems_allowed = new_mems;
		top_cpuset.effective_mems = new_mems;
		mutex_unlock(&callback_mutex);
		spin_unlock_irq(&callback_lock);
		update_tasks_nodemask(&top_cpuset);
		}

		@@ -2366,11 +2366,13 @@ void __init cpuset_init_smp(void)

		void cpuset_cpus_allowed(struct task_struct tsk, struct cpumask pmask)
		{
		mutex_lock(&callback_mutex);
		unsigned long flags;

		spin_lock_irqsave(&callback_lock, flags);
		rcu_read_lock();
		guarantee_online_cpus(task_cs(tsk), pmask);
		rcu_read_unlock();
		mutex_unlock(&callback_mutex);
		spin_unlock_irqrestore(&callback_lock, flags);
		}

		void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
		@@ -2416,12 +2418,13 @@ void cpuset_init_current_mems_allowed(void)
		nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
		{
		nodemask_t mask;
		unsigned long flags;

		mutex_lock(&callback_mutex);
		spin_lock_irqsave(&callback_lock, flags);
		rcu_read_lock();
		guarantee_online_mems(task_cs(tsk), &mask);
		rcu_read_unlock();
		mutex_unlock(&callback_mutex);
		spin_unlock_irqrestore(&callback_lock, flags);

		return mask;
		}
		@@ -2440,7 +2443,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
		/*
		* nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
		* mem_hardwall ancestor to the specified cpuset. Call holding
		* callback_mutex. If no ancestor is mem_exclusive or mem_hardwall
		* callback_lock. If no ancestor is mem_exclusive or mem_hardwall
		* (an unusual configuration), then returns the root cpuset.
		*/
		static struct cpuset nearest_hardwall_ancestor(struct cpuset cs)
		@@ -2451,7 +2454,7 @@ static struct cpuset nearest_hardwall_ancestor(struct cpuset cs)
		}

		/**
		* cpuset_node_allowed_softwall - Can we allocate on a memory node?
		* cpuset_node_allowed - Can we allocate on a memory node?
		* @node: is this an allowed node?
		* @gfp_mask: memory allocation flags
		*
		@@ -2463,13 +2466,6 @@ static struct cpuset nearest_hardwall_ancestor(struct cpuset cs)
		* flag, yes.
		* Otherwise, no.
		*
		* If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
		* cpuset_node_allowed_hardwall(). Otherwise, cpuset_node_allowed_softwall()
		* might sleep, and might allow a node from an enclosing cpuset.
		*
		* cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
		* cpusets, and never sleeps.
		*
		* The __GFP_THISNODE placement logic is really handled elsewhere,
		* by forcibly using a zonelist starting at a specified node, and by
		* (in get_page_from_freelist()) refusing to consider the zones for
		@@ -2482,13 +2478,12 @@ static struct cpuset nearest_hardwall_ancestor(struct cpuset cs)
		* GFP_KERNEL allocations are not so marked, so can escape to the
		* nearest enclosing hardwalled ancestor cpuset.
		*
		* Scanning up parent cpusets requires callback_mutex. The
		* Scanning up parent cpusets requires callback_lock. The
		* __alloc_pages() routine only calls here with __GFP_HARDWALL bit
		* _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
		* current tasks mems_allowed came up empty on the first pass over
		* the zonelist. So only GFP_KERNEL allocations, if all nodes in the
		* cpuset are short of memory, might require taking the callback_mutex
		* mutex.
		* cpuset are short of memory, might require taking the callback_lock.
		*
		* The first call here from mm/page_alloc:get_page_from_freelist()
		* has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
		@@ -2505,20 +2500,15 @@ static struct cpuset nearest_hardwall_ancestor(struct cpuset cs)
		* TIF_MEMDIE - any node ok
		* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
		* GFP_USER - only nodes in current tasks mems allowed ok.
		*
		* Rule:
		* Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
		* pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
		* the code that might scan up ancestor cpusets and sleep.
		*/
		int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
		int __cpuset_node_allowed(int node, gfp_t gfp_mask)
		{
		struct cpuset cs; / current cpuset ancestors */
		int allowed; /* is allocation in zone z allowed? */
		unsigned long flags;

		if (in_interrupt() \|\| (gfp_mask & __GFP_THISNODE))
		return 1;
		might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
		if (node_isset(node, current->mems_allowed))
		return 1;
		/*
		@@ -2534,55 +2524,17 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
		return 1;

		/* Not hardwall and node outside mems_allowed: scan up cpusets */
		mutex_lock(&callback_mutex);
		spin_lock_irqsave(&callback_lock, flags);

		rcu_read_lock();
		cs = nearest_hardwall_ancestor(task_cs(current));
		allowed = node_isset(node, cs->mems_allowed);
		rcu_read_unlock();

		mutex_unlock(&callback_mutex);
		spin_unlock_irqrestore(&callback_lock, flags);
		return allowed;
		}

		/*
		* cpuset_node_allowed_hardwall - Can we allocate on a memory node?
		* @node: is this an allowed node?
		* @gfp_mask: memory allocation flags
		*
		* If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is
		* set, yes, we can always allocate. If node is in our task's mems_allowed,
		* yes. If the task has been OOM killed and has access to memory reserves as
		* specified by the TIF_MEMDIE flag, yes.
		* Otherwise, no.
		*
		* The __GFP_THISNODE placement logic is really handled elsewhere,
		* by forcibly using a zonelist starting at a specified node, and by
		* (in get_page_from_freelist()) refusing to consider the zones for
		* any node on the zonelist except the first. By the time any such
		* calls get to this routine, we should just shut up and say 'yes'.
		*
		* Unlike the cpuset_node_allowed_softwall() variant, above,
		* this variant requires that the node be in the current task's
		* mems_allowed or that we're in interrupt. It does not scan up the
		* cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
		* It never sleeps.
		*/
		int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
		{
		if (in_interrupt() \|\| (gfp_mask & __GFP_THISNODE))
		return 1;
		if (node_isset(node, current->mems_allowed))
		return 1;
		/*
		* Allow tasks that have access to memory reserves because they have
		* been OOM killed to get memory anywhere.
		*/
		if (unlikely(test_thread_flag(TIF_MEMDIE)))
		return 1;
		return 0;
		}

		/**
		* cpuset_mem_spread_node() - On which node to begin search for a file page
		* cpuset_slab_spread_node() - On which node to begin search for a slab page

mm/hugetlb.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -582,7 +582,7 @@ static struct page dequeue_huge_page_vma(struct hstate h,

		for_each_zone_zonelist_nodemask(zone, z, zonelist,
		MAX_NR_ZONES - 1, nodemask) {
		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
		if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
		page = dequeue_huge_page_node(h, zone_to_nid(zone));
		if (page) {
		if (avoid_reserve)

Original line number	Diff line number	Diff line
		@@ -638,8 +638,10 @@ struct cgroup_subsys {
		struct cgroup_subsys_state (css_alloc)(struct cgroup_subsys_state *parent_css);
		int (css_online)(struct cgroup_subsys_state css);
		void (css_offline)(struct cgroup_subsys_state css);
		void (css_released)(struct cgroup_subsys_state css);
		void (css_free)(struct cgroup_subsys_state css);
		void (css_reset)(struct cgroup_subsys_state css);
		void (css_e_css_changed)(struct cgroup_subsys_state css);

		int (can_attach)(struct cgroup_subsys_state css,
		struct cgroup_taskset *tset);
		@@ -934,6 +936,8 @@ void css_task_iter_end(struct css_task_iter *it);
		int cgroup_attach_task_all(struct task_struct from, struct task_struct );
		int cgroup_transfer_tasks(struct cgroup to, struct cgroup from);

		struct cgroup_subsys_state cgroup_get_e_css(struct cgroup cgroup,
		struct cgroup_subsys *ss);
		struct cgroup_subsys_state css_tryget_online_from_dir(struct dentry dentry,
		struct cgroup_subsys *ss);