Merge branch 'sched/cpuset' into sched/urgent (291c54ff) · Commits · e / devices / android_kernel_fairphone_FP3

include/linux/cpuset.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -160,7 +160,7 @@ static inline int current_cpuset_is_being_rebound(void)

		static inline void rebuild_sched_domains(void)
		{
		partition_sched_domains(0, NULL, NULL);
		partition_sched_domains(1, NULL, NULL);
		}

		#endif /* !CONFIG_CPUSETS */

kernel/cpuset.c

+182 −130

Original line number	Diff line number	Diff line
		@@ -14,6 +14,8 @@
		* 2003-10-22 Updates by Stephen Hemminger.
		* 2004 May-July Rework by Paul Jackson.
		* 2006 Rework by Paul Menage to use generic cgroups
		* 2008 Rework of the scheduler domains and CPU hotplug handling
		* by Max Krasnyansky
		*
		* This file is subject to the terms and conditions of the GNU General Public
		* License. See the file COPYING in the main directory of the Linux
		@@ -236,9 +238,11 @@ static struct cpuset top_cpuset = {

		static DEFINE_MUTEX(callback_mutex);

		/* This is ugly, but preserves the userspace API for existing cpuset
		/*
		* This is ugly, but preserves the userspace API for existing cpuset
		* users. If someone tries to mount the "cpuset" filesystem, we
		* silently switch it to mount "cgroup" instead */
		* silently switch it to mount "cgroup" instead
		*/
		static int cpuset_get_sb(struct file_system_type *fs_type,
		int flags, const char *unused_dev_name,
		void data, struct vfsmount mnt)
		@@ -473,10 +477,9 @@ static int validate_change(const struct cpuset cur, const struct cpuset trial)
		}

		/*
		* Helper routine for rebuild_sched_domains().
		* Helper routine for generate_sched_domains().
		* Do cpusets a, b have overlapping cpus_allowed masks?
		*/

		static int cpusets_overlap(struct cpuset a, struct cpuset b)
		{
		return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
		@@ -518,26 +521,15 @@ update_domain_attr_tree(struct sched_domain_attr dattr, struct cpuset c)
		}

		/*
		* rebuild_sched_domains()
		*
		* This routine will be called to rebuild the scheduler's dynamic
		* sched domains:
		* - if the flag 'sched_load_balance' of any cpuset with non-empty
		* 'cpus' changes,
		* - or if the 'cpus' allowed changes in any cpuset which has that
		* flag enabled,
		* - or if the 'sched_relax_domain_level' of any cpuset which has
		* that flag enabled and with non-empty 'cpus' changes,
		* - or if any cpuset with non-empty 'cpus' is removed,
		* - or if a cpu gets offlined.
		*
		* This routine builds a partial partition of the systems CPUs
		* (the set of non-overlappping cpumask_t's in the array 'part'
		* below), and passes that partial partition to the kernel/sched.c
		* partition_sched_domains() routine, which will rebuild the
		* schedulers load balancing domains (sched domains) as specified
		* by that partial partition. A 'partial partition' is a set of
		* non-overlapping subsets whose union is a subset of that set.
		* generate_sched_domains()
		*
		* This function builds a partial partition of the systems CPUs
		* A 'partial partition' is a set of non-overlapping subsets whose
		* union is a subset of that set.
		* The output of this function needs to be passed to kernel/sched.c
		* partition_sched_domains() routine, which will rebuild the scheduler's
		* load balancing domains (sched domains) as specified by that partial
		* partition.
		*
		* See "What is sched_load_balance" in Documentation/cpusets.txt
		* for a background explanation of this.
		@@ -547,13 +539,7 @@ update_domain_attr_tree(struct sched_domain_attr dattr, struct cpuset c)
		* domains when operating in the severe memory shortage situations
		* that could cause allocation failures below.
		*
		* Call with cgroup_mutex held. May take callback_mutex during
		* call due to the kfifo_alloc() and kmalloc() calls. May nest
		* a call to the get_online_cpus()/put_online_cpus() pair.
		* Must not be called holding callback_mutex, because we must not
		* call get_online_cpus() while holding callback_mutex. Elsewhere
		* the kernel nests callback_mutex inside get_online_cpus() calls.
		* So the reverse nesting would risk an ABBA deadlock.
		* Must be called with cgroup_lock held.
		*
		* The three key local variables below are:
		* q - a linked-list queue of cpuset pointers, used to implement a
		@@ -588,8 +574,8 @@ update_domain_attr_tree(struct sched_domain_attr dattr, struct cpuset c)
		* element of the partition (one sched domain) to be passed to
		* partition_sched_domains().
		*/

		void rebuild_sched_domains(void)
		static int generate_sched_domains(cpumask_t **domains,
		struct sched_domain_attr **attributes)
		{
		LIST_HEAD(q); /* queue of cpusets to be scanned */
		struct cpuset cp; / scans q */
		@@ -601,23 +587,26 @@ void rebuild_sched_domains(void)
		int ndoms; /* number of sched domains in result */
		int nslot; /* next empty doms[] cpumask_t slot */

		csa = NULL;
		ndoms = 0;
		doms = NULL;
		dattr = NULL;
		csa = NULL;

		/* Special case for the 99% of systems with one, full, sched domain */
		if (is_sched_load_balance(&top_cpuset)) {
		ndoms = 1;
		doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
		if (!doms)
		goto rebuild;
		goto done;

		dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
		if (dattr) {
		*dattr = SD_ATTR_INIT;
		update_domain_attr_tree(dattr, &top_cpuset);
		}
		*doms = top_cpuset.cpus_allowed;
		goto rebuild;

		ndoms = 1;
		goto done;
		}

		csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
		@@ -680,18 +669,33 @@ void rebuild_sched_domains(void)
		}
		}

		/* Convert <csn, csa> to <ndoms, doms> */
		/*
		* Now we know how many domains to create.
		* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
		*/
		doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
		if (!doms)
		goto rebuild;
		if (!doms) {
		ndoms = 0;
		goto done;
		}

		/*
		* The rest of the code, including the scheduler, can deal with
		* dattr==NULL case. No need to abort if alloc fails.
		*/
		dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);

		for (nslot = 0, i = 0; i < csn; i++) {
		struct cpuset *a = csa[i];
		cpumask_t *dp;
		int apn = a->pn;

		if (apn >= 0) {
		cpumask_t *dp = doms + nslot;
		if (apn < 0) {
		/* Skip completed partitions */
		continue;
		}

		dp = doms + nslot;

		if (nslot == ndoms) {
		static int warnings = 10;
		@@ -714,27 +718,92 @@ void rebuild_sched_domains(void)

		if (apn == b->pn) {
		cpus_or(dp, dp, b->cpus_allowed);
		b->pn = -1;
		if (dattr)
		update_domain_attr_tree(dattr
		+ nslot, b);
		update_domain_attr_tree(dattr + nslot, b);

		/* Done with this partition */
		b->pn = -1;
		}
		}
		nslot++;
		}
		}
		BUG_ON(nslot != ndoms);

		rebuild:
		/* Have scheduler rebuild sched domains */
		done:
		kfree(csa);

		*domains = doms;
		*attributes = dattr;
		return ndoms;
		}

		/*
		* Rebuild scheduler domains.
		*
		* Call with neither cgroup_mutex held nor within get_online_cpus().
		* Takes both cgroup_mutex and get_online_cpus().
		*
		* Cannot be directly called from cpuset code handling changes
		* to the cpuset pseudo-filesystem, because it cannot be called
		* from code that already holds cgroup_mutex.
		*/
		static void do_rebuild_sched_domains(struct work_struct *unused)
		{
		struct sched_domain_attr *attr;
		cpumask_t *doms;
		int ndoms;

		get_online_cpus();
		partition_sched_domains(ndoms, doms, dattr);

		/* Generate domain masks and attrs */
		cgroup_lock();
		ndoms = generate_sched_domains(&doms, &attr);
		cgroup_unlock();

		/* Have scheduler rebuild the domains */
		partition_sched_domains(ndoms, doms, attr);

		put_online_cpus();
		}

		done:
		kfree(csa);
		/* Don't kfree(doms) -- partition_sched_domains() does that. */
		/* Don't kfree(dattr) -- partition_sched_domains() does that. */
		static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);

		/*
		* Rebuild scheduler domains, asynchronously via workqueue.
		*
		* If the flag 'sched_load_balance' of any cpuset with non-empty
		* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
		* which has that flag enabled, or if any cpuset with a non-empty
		* 'cpus' is removed, then call this routine to rebuild the
		* scheduler's dynamic sched domains.
		*
		* The rebuild_sched_domains() and partition_sched_domains()
		* routines must nest cgroup_lock() inside get_online_cpus(),
		* but such cpuset changes as these must nest that locking the
		* other way, holding cgroup_lock() for much of the code.
		*
		* So in order to avoid an ABBA deadlock, the cpuset code handling
		* these user changes delegates the actual sched domain rebuilding
		* to a separate workqueue thread, which ends up processing the
		* above do_rebuild_sched_domains() function.
		*/
		static void async_rebuild_sched_domains(void)
		{
		schedule_work(&rebuild_sched_domains_work);
		}

		/*
		* Accomplishes the same scheduler domain rebuild as the above
		* async_rebuild_sched_domains(), however it directly calls the
		* rebuild routine synchronously rather than calling it via an
		* asynchronous work thread.
		*
		* This can only be called from code that is not holding
		* cgroup_mutex (not nested in a cgroup_lock() call.)
		*/
		void rebuild_sched_domains(void)
		{
		do_rebuild_sched_domains(NULL);
		}

		/**
		@@ -863,7 +932,7 @@ static int update_cpumask(struct cpuset cs, const char buf)
		return retval;

		if (is_load_balanced)
		rebuild_sched_domains();
		async_rebuild_sched_domains();
		return 0;
		}

		@@ -1090,7 +1159,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
		if (val != cs->relax_domain_level) {
		cs->relax_domain_level = val;
		if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
		rebuild_sched_domains();
		async_rebuild_sched_domains();
		}

		return 0;
		@@ -1131,7 +1200,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
		mutex_unlock(&callback_mutex);

		if (cpus_nonempty && balance_flag_changed)
		rebuild_sched_domains();
		async_rebuild_sched_domains();

		return 0;
		}
		@@ -1492,6 +1561,9 @@ static u64 cpuset_read_u64(struct cgroup cont, struct cftype cft)
		default:
		BUG();
		}

		/* Unreachable but makes gcc happy */
		return 0;
		}

		static s64 cpuset_read_s64(struct cgroup cont, struct cftype cft)
		@@ -1504,6 +1576,9 @@ static s64 cpuset_read_s64(struct cgroup cont, struct cftype cft)
		default:
		BUG();
		}

		/* Unrechable but makes gcc happy */
		return 0;
		}


		@@ -1692,15 +1767,9 @@ static struct cgroup_subsys_state *cpuset_create(
		}

		/*
		* Locking note on the strange update_flag() call below:
		*
		* If the cpuset being removed has its flag 'sched_load_balance'
		* enabled, then simulate turning sched_load_balance off, which
		* will call rebuild_sched_domains(). The get_online_cpus()
		* call in rebuild_sched_domains() must not be made while holding
		* callback_mutex. Elsewhere the kernel nests callback_mutex inside
		* get_online_cpus() calls. So the reverse nesting would risk an
		* ABBA deadlock.
		* will call async_rebuild_sched_domains().
		*/

		static void cpuset_destroy(struct cgroup_subsys ss, struct cgroup cont)
		@@ -1811,7 +1880,7 @@ static void move_member_tasks_to_cpuset(struct cpuset from, struct cpuset to)
		}

		/*
		* If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
		* If CPU and/or memory hotplug handlers, below, unplug any CPUs
		* or memory nodes, we need to walk over the cpuset hierarchy,
		* removing that CPU or node from all cpusets. If this removes the
		* last CPU or node from a cpuset, then move the tasks in the empty
		@@ -1902,35 +1971,6 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
		}
		}

		/*
		* The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
		* cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
		* track what's online after any CPU or memory node hotplug or unplug event.
		*
		* Since there are two callers of this routine, one for CPU hotplug
		* events and one for memory node hotplug events, we could have coded
		* two separate routines here. We code it as a single common routine
		* in order to minimize text size.
		*/

		static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
		{
		cgroup_lock();

		top_cpuset.cpus_allowed = cpu_online_map;
		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
		scan_for_empty_cpusets(&top_cpuset);

		/*
		* Scheduler destroys domains on hotplug events.
		* Rebuild them based on the current settings.
		*/
		if (rebuild_sd)
		rebuild_sched_domains();

		cgroup_unlock();
		}

		/*
		* The top_cpuset tracks what CPUs and Memory Nodes are online,
		* period. This is necessary in order to make cpusets transparent
		@@ -1939,40 +1979,52 @@ static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
		*
		* This routine ensures that top_cpuset.cpus_allowed tracks
		* cpu_online_map on each CPU hotplug (cpuhp) event.
		*
		* Called within get_online_cpus(). Needs to call cgroup_lock()
		* before calling generate_sched_domains().
		*/

		static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
		static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
		unsigned long phase, void *unused_cpu)
		{
		struct sched_domain_attr *attr;
		cpumask_t *doms;
		int ndoms;

		switch (phase) {
		case CPU_UP_CANCELED:
		case CPU_UP_CANCELED_FROZEN:
		case CPU_DOWN_FAILED:
		case CPU_DOWN_FAILED_FROZEN:
		case CPU_ONLINE:
		case CPU_ONLINE_FROZEN:
		case CPU_DEAD:
		case CPU_DEAD_FROZEN:
		common_cpu_mem_hotplug_unplug(1);
		break;

		default:
		return NOTIFY_DONE;
		}

		cgroup_lock();
		top_cpuset.cpus_allowed = cpu_online_map;
		scan_for_empty_cpusets(&top_cpuset);
		ndoms = generate_sched_domains(&doms, &attr);
		cgroup_unlock();

		/* Have scheduler rebuild the domains */
		partition_sched_domains(ndoms, doms, attr);

		return NOTIFY_OK;
		}

		#ifdef CONFIG_MEMORY_HOTPLUG
		/*
		* Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
		* Call this routine anytime after you change
		* node_states[N_HIGH_MEMORY].
		* See also the previous routine cpuset_handle_cpuhp().
		* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
		* See also the previous routine cpuset_track_online_cpus().
		*/

		void cpuset_track_online_nodes(void)
		{
		common_cpu_mem_hotplug_unplug(0);
		cgroup_lock();
		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
		scan_for_empty_cpusets(&top_cpuset);
		cgroup_unlock();
		}
		#endif

		@@ -1987,7 +2039,7 @@ void __init cpuset_init_smp(void)
		top_cpuset.cpus_allowed = cpu_online_map;
		top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];

		hotcpu_notifier(cpuset_handle_cpuhp, 0);
		hotcpu_notifier(cpuset_track_online_cpus, 0);
		}

		/**

kernel/sched.c

+13 −6

Original line number	Diff line number	Diff line
		@@ -7696,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
		* and partition_sched_domains() will fallback to the single partition
		* 'fallback_doms', it also forces the domains to be rebuilt.
		*
		* If doms_new==NULL it will be replaced with cpu_online_map.
		* ndoms_new==0 is a special case for destroying existing domains.
		* It will not create the default domain.
		*
		* Call with hotplug lock held
		*/
		void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
		struct sched_domain_attr *dattr_new)
		{
		int i, j;
		int i, j, n;

		mutex_lock(&sched_domains_mutex);

		/* always unregister in case we don't destroy any domains */
		unregister_sched_domain_sysctl();

		if (doms_new == NULL)
		ndoms_new = 0;
		n = doms_new ? ndoms_new : 0;

		/* Destroy deleted domains */
		for (i = 0; i < ndoms_cur; i++) {
		for (j = 0; j < ndoms_new; j++) {
		for (j = 0; j < n; j++) {
		if (cpus_equal(doms_cur[i], doms_new[j])
		&& dattrs_equal(dattr_cur, i, dattr_new, j))
		goto match1;
		@@ -7726,7 +7729,6 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,

		if (doms_new == NULL) {
		ndoms_cur = 0;
		ndoms_new = 1;
		doms_new = &fallback_doms;
		cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
		dattr_new = NULL;
		@@ -7763,8 +7765,13 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
		int arch_reinit_sched_domains(void)
		{
		get_online_cpus();

		/* Destroy domains first to force the rebuild */
		partition_sched_domains(0, NULL, NULL);

		rebuild_sched_domains();
		put_online_cpus();

		return 0;
		}

		@@ -7848,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb,
		case CPU_ONLINE_FROZEN:
		case CPU_DEAD:
		case CPU_DEAD_FROZEN:
		partition_sched_domains(0, NULL, NULL);
		partition_sched_domains(1, NULL, NULL);
		return NOTIFY_OK;

		default: