Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit df7c8e84 authored by Rusty Russell's avatar Rusty Russell Committed by Ingo Molnar
Browse files

cpumask: remove cpumask allocation from idle_balance



Impact: fix circular locking

Steven reports a circular locking from alloc_cpumask_var doing
a wakeup. We get rid of this using the tried-and-true technique
of using a per-cpu cpumask_var_t rather than doing an alloc
every time.

Simpler and more robust than a rare, implicit allocation within
an atomic codepath.

Reported-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <alpine.DEB.2.00.0903181729360.31583@gandalf.stny.rr.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent c38da569
Loading
Loading
Loading
Loading
+18 −17
Original line number Original line Diff line number Diff line
@@ -3448,19 +3448,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 */
 */
#define MAX_PINNED_INTERVAL	512
#define MAX_PINNED_INTERVAL	512


/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);

/*
/*
 * Check this_cpu to ensure it is balanced within domain. Attempt to move
 * Check this_cpu to ensure it is balanced within domain. Attempt to move
 * tasks if there is an imbalance.
 * tasks if there is an imbalance.
 */
 */
static int load_balance(int this_cpu, struct rq *this_rq,
static int load_balance(int this_cpu, struct rq *this_rq,
			struct sched_domain *sd, enum cpu_idle_type idle,
			struct sched_domain *sd, enum cpu_idle_type idle,
			int *balance, struct cpumask *cpus)
			int *balance)
{
{
	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
	struct sched_group *group;
	struct sched_group *group;
	unsigned long imbalance;
	unsigned long imbalance;
	struct rq *busiest;
	struct rq *busiest;
	unsigned long flags;
	unsigned long flags;
	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);


	cpumask_setall(cpus);
	cpumask_setall(cpus);


@@ -3615,8 +3619,7 @@ out:
 * this_rq is locked.
 * this_rq is locked.
 */
 */
static int
static int
load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
			struct cpumask *cpus)
{
{
	struct sched_group *group;
	struct sched_group *group;
	struct rq *busiest = NULL;
	struct rq *busiest = NULL;
@@ -3624,6 +3627,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
	int ld_moved = 0;
	int ld_moved = 0;
	int sd_idle = 0;
	int sd_idle = 0;
	int all_pinned = 0;
	int all_pinned = 0;
	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);


	cpumask_setall(cpus);
	cpumask_setall(cpus);


@@ -3764,10 +3768,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
	struct sched_domain *sd;
	struct sched_domain *sd;
	int pulled_task = 0;
	int pulled_task = 0;
	unsigned long next_balance = jiffies + HZ;
	unsigned long next_balance = jiffies + HZ;
	cpumask_var_t tmpmask;

	if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
		return;


	for_each_domain(this_cpu, sd) {
	for_each_domain(this_cpu, sd) {
		unsigned long interval;
		unsigned long interval;
@@ -3778,7 +3778,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
		if (sd->flags & SD_BALANCE_NEWIDLE)
		if (sd->flags & SD_BALANCE_NEWIDLE)
			/* If we've pulled tasks over stop searching: */
			/* If we've pulled tasks over stop searching: */
			pulled_task = load_balance_newidle(this_cpu, this_rq,
			pulled_task = load_balance_newidle(this_cpu, this_rq,
							   sd, tmpmask);
							   sd);


		interval = msecs_to_jiffies(sd->balance_interval);
		interval = msecs_to_jiffies(sd->balance_interval);
		if (time_after(next_balance, sd->last_balance + interval))
		if (time_after(next_balance, sd->last_balance + interval))
@@ -3793,7 +3793,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
		 */
		 */
		this_rq->next_balance = next_balance;
		this_rq->next_balance = next_balance;
	}
	}
	free_cpumask_var(tmpmask);
}
}


/*
/*
@@ -3943,11 +3942,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
	unsigned long next_balance = jiffies + 60*HZ;
	unsigned long next_balance = jiffies + 60*HZ;
	int update_next_balance = 0;
	int update_next_balance = 0;
	int need_serialize;
	int need_serialize;
	cpumask_var_t tmp;

	/* Fails alloc?  Rebalancing probably not a priority right now. */
	if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
		return;


	for_each_domain(cpu, sd) {
	for_each_domain(cpu, sd) {
		if (!(sd->flags & SD_LOAD_BALANCE))
		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3972,7 +3966,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
		}
		}


		if (time_after_eq(jiffies, sd->last_balance + interval)) {
		if (time_after_eq(jiffies, sd->last_balance + interval)) {
			if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
			if (load_balance(cpu, rq, sd, idle, &balance)) {
				/*
				/*
				 * We've pulled tasks over so either we're no
				 * We've pulled tasks over so either we're no
				 * longer idle, or one of our SMT siblings is
				 * longer idle, or one of our SMT siblings is
@@ -4006,8 +4000,6 @@ out:
	 */
	 */
	if (likely(update_next_balance))
	if (likely(update_next_balance))
		rq->next_balance = next_balance;
		rq->next_balance = next_balance;

	free_cpumask_var(tmp);
}
}


/*
/*
@@ -8303,6 +8295,9 @@ void __init sched_init(void)
#endif
#endif
#ifdef CONFIG_USER_SCHED
#ifdef CONFIG_USER_SCHED
	alloc_size *= 2;
	alloc_size *= 2;
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
	alloc_size *= num_possible_cpus() * cpumask_size();
#endif
#endif
	/*
	/*
	 * As sched_init() is called before page_alloc is setup,
	 * As sched_init() is called before page_alloc is setup,
@@ -8341,6 +8336,12 @@ void __init sched_init(void)
		ptr += nr_cpu_ids * sizeof(void **);
		ptr += nr_cpu_ids * sizeof(void **);
#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CPUMASK_OFFSTACK
		for_each_possible_cpu(i) {
			per_cpu(load_balance_tmpmask, i) = (void *)ptr;
			ptr += cpumask_size();
		}
#endif /* CONFIG_CPUMASK_OFFSTACK */
	}
	}


#ifdef CONFIG_SMP
#ifdef CONFIG_SMP