[PATCH] sched: fix smt nice lock contention and optimization (c96d145e) · Commits · e / devices / android_kernel_fairphone_FP3

kernel/sched.c

+59 −123

Original line number	Original line	Diff line number	Diff line
	@@ -239,7 +239,6 @@ struct runqueue {

	task_t *migration_thread;		task_t *migration_thread;
	struct list_head migration_queue;		struct list_head migration_queue;
	int cpu;
	#endif		#endif

	#ifdef CONFIG_SCHEDSTATS		#ifdef CONFIG_SCHEDSTATS
	@@ -1074,9 +1073,10 @@ static int sched_balance_self(int cpu, int flag)
	struct task_struct *t = current;		struct task_struct *t = current;
	struct sched_domain tmp, sd = NULL;		struct sched_domain tmp, sd = NULL;

	for_each_domain(cpu, tmp)		for_each_domain(cpu, tmp) {
	if (tmp->flags & flag)		if (tmp->flags & flag)
	sd = tmp;		sd = tmp;
			}

	while (sd) {		while (sd) {
	cpumask_t span;		cpumask_t span;
	@@ -1691,9 +1691,6 @@ unsigned long nr_active(void)
	/*		/*
	* double_rq_lock - safely lock two runqueues		* double_rq_lock - safely lock two runqueues
	*		*
	* We must take them in cpu order to match code in
	* dependent_sleeper and wake_dependent_sleeper.
	*
	* Note this does not disable interrupts like task_rq_lock,		* Note this does not disable interrupts like task_rq_lock,
	* you need to do so manually before calling.		* you need to do so manually before calling.
	*/		*/
	@@ -1705,7 +1702,7 @@ static void double_rq_lock(runqueue_t rq1, runqueue_t rq2)
	spin_lock(&rq1->lock);		spin_lock(&rq1->lock);
	__acquire(rq2->lock); /* Fake it out ;) */		__acquire(rq2->lock); /* Fake it out ;) */
	} else {		} else {
	if (rq1->cpu < rq2->cpu) {		if (rq1 < rq2) {
	spin_lock(&rq1->lock);		spin_lock(&rq1->lock);
	spin_lock(&rq2->lock);		spin_lock(&rq2->lock);
	} else {		} else {
	@@ -1741,7 +1738,7 @@ static void double_lock_balance(runqueue_t this_rq, runqueue_t busiest)
	__acquires(this_rq->lock)		__acquires(this_rq->lock)
	{		{
	if (unlikely(!spin_trylock(&busiest->lock))) {		if (unlikely(!spin_trylock(&busiest->lock))) {
	if (busiest->cpu < this_rq->cpu) {		if (busiest < this_rq) {
	spin_unlock(&this_rq->lock);		spin_unlock(&this_rq->lock);
	spin_lock(&busiest->lock);		spin_lock(&busiest->lock);
	spin_lock(&this_rq->lock);		spin_lock(&this_rq->lock);
	@@ -2352,10 +2349,11 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
	double_lock_balance(busiest_rq, target_rq);		double_lock_balance(busiest_rq, target_rq);

	/* Search for an sd spanning us and the target CPU. */		/* Search for an sd spanning us and the target CPU. */
	for_each_domain(target_cpu, sd)		for_each_domain(target_cpu, sd) {
	if ((sd->flags & SD_LOAD_BALANCE) &&		if ((sd->flags & SD_LOAD_BALANCE) &&
	cpu_isset(busiest_cpu, sd->span))		cpu_isset(busiest_cpu, sd->span))
	break;		break;
			}

	if (unlikely(sd == NULL))		if (unlikely(sd == NULL))
	goto out;		goto out;
	@@ -2691,48 +2689,35 @@ static inline void wakeup_busy_runqueue(runqueue_t *rq)
	resched_task(rq->idle);		resched_task(rq->idle);
	}		}

	static void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)		/*
			* Called with interrupt disabled and this_rq's runqueue locked.
			*/
			static void wake_sleeping_dependent(int this_cpu)
	{		{
	struct sched_domain tmp, sd = NULL;		struct sched_domain tmp, sd = NULL;
	cpumask_t sibling_map;
	int i;		int i;

	for_each_domain(this_cpu, tmp)		for_each_domain(this_cpu, tmp) {
	if (tmp->flags & SD_SHARE_CPUPOWER)		if (tmp->flags & SD_SHARE_CPUPOWER) {
	sd = tmp;		sd = tmp;
			break;
			}
			}

	if (!sd)		if (!sd)
	return;		return;

	/*		for_each_cpu_mask(i, sd->span) {
	* Unlock the current runqueue because we have to lock in
	* CPU order to avoid deadlocks. Caller knows that we might
	* unlock. We keep IRQs disabled.
	*/
	spin_unlock(&this_rq->lock);

	sibling_map = sd->span;

	for_each_cpu_mask(i, sibling_map)
	spin_lock(&cpu_rq(i)->lock);
	/*
	* We clear this CPU from the mask. This both simplifies the
	* inner loop and keps this_rq locked when we exit:
	*/
	cpu_clear(this_cpu, sibling_map);

	for_each_cpu_mask(i, sibling_map) {
	runqueue_t *smt_rq = cpu_rq(i);		runqueue_t *smt_rq = cpu_rq(i);

			if (i == this_cpu)
			continue;
			if (unlikely(!spin_trylock(&smt_rq->lock)))
			continue;

	wakeup_busy_runqueue(smt_rq);		wakeup_busy_runqueue(smt_rq);
			spin_unlock(&smt_rq->lock);
	}		}

	for_each_cpu_mask(i, sibling_map)
	spin_unlock(&cpu_rq(i)->lock);
	/*
	* We exit with this_cpu's rq still held and IRQs
	* still disabled:
	*/
	}		}

	/*		/*
	@@ -2745,52 +2730,46 @@ static inline unsigned long smt_slice(task_t p, struct sched_domain sd)
	return p->time_slice * (100 - sd->per_cpu_gain) / 100;		return p->time_slice * (100 - sd->per_cpu_gain) / 100;
	}		}

	static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)		/*
			* To minimise lock contention and not have to drop this_rq's runlock we only
			* trylock the sibling runqueues and bypass those runqueues if we fail to
			* acquire their lock. As we only trylock the normal locking order does not
			* need to be obeyed.
			*/
			static int dependent_sleeper(int this_cpu, runqueue_t this_rq, task_t p)
	{		{
	struct sched_domain tmp, sd = NULL;		struct sched_domain tmp, sd = NULL;
	cpumask_t sibling_map;
	prio_array_t *array;
	int ret = 0, i;		int ret = 0, i;
	task_t *p;

	for_each_domain(this_cpu, tmp)		/* kernel/rt threads do not participate in dependent sleeping */
	if (tmp->flags & SD_SHARE_CPUPOWER)		if (!p->mm \|\| rt_task(p))
			return 0;

			for_each_domain(this_cpu, tmp) {
			if (tmp->flags & SD_SHARE_CPUPOWER) {
	sd = tmp;		sd = tmp;
			break;
			}
			}

	if (!sd)		if (!sd)
	return 0;		return 0;

	/*		for_each_cpu_mask(i, sd->span) {
	* The same locking rules and details apply as for		runqueue_t *smt_rq;
	* wake_sleeping_dependent():		task_t *smt_curr;
	*/
	spin_unlock(&this_rq->lock);
	sibling_map = sd->span;
	for_each_cpu_mask(i, sibling_map)
	spin_lock(&cpu_rq(i)->lock);
	cpu_clear(this_cpu, sibling_map);

	/*		if (i == this_cpu)
	* Establish next task to be run - it might have gone away because		continue;
	* we released the runqueue lock above:
	*/
	if (!this_rq->nr_running)
	goto out_unlock;
	array = this_rq->active;
	if (!array->nr_active)
	array = this_rq->expired;
	BUG_ON(!array->nr_active);

	p = list_entry(array->queue[sched_find_first_bit(array->bitmap)].next,		smt_rq = cpu_rq(i);
	task_t, run_list);		if (unlikely(!spin_trylock(&smt_rq->lock)))
			continue;

	for_each_cpu_mask(i, sibling_map) {		smt_curr = smt_rq->curr;
	runqueue_t *smt_rq = cpu_rq(i);
	task_t *smt_curr = smt_rq->curr;

	/* Kernel threads do not participate in dependent sleeping */		if (!smt_curr->mm)
	if (!p->mm \|\| !smt_curr->mm \|\| rt_task(p))		goto unlock;
	goto check_smt_task;

	/*		/*
	* If a user task with lower static priority than the		* If a user task with lower static priority than the
	@@ -2808,49 +2787,24 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
	if ((jiffies % DEF_TIMESLICE) >		if ((jiffies % DEF_TIMESLICE) >
	(sd->per_cpu_gain * DEF_TIMESLICE / 100))		(sd->per_cpu_gain * DEF_TIMESLICE / 100))
	ret = 1;		ret = 1;
	} else		} else {
	if (smt_curr->static_prio < p->static_prio &&		if (smt_curr->static_prio < p->static_prio &&
	!TASK_PREEMPTS_CURR(p, smt_rq) &&		!TASK_PREEMPTS_CURR(p, smt_rq) &&
	smt_slice(smt_curr, sd) > task_timeslice(p))		smt_slice(smt_curr, sd) > task_timeslice(p))
	ret = 1;		ret = 1;

	check_smt_task:
	if ((!smt_curr->mm && smt_curr != smt_rq->idle) \|\|
	rt_task(smt_curr))
	continue;
	if (!p->mm) {
	wakeup_busy_runqueue(smt_rq);
	continue;
	}

	/*
	* Reschedule a lower priority task on the SMT sibling for
	* it to be put to sleep, or wake it up if it has been put to
	* sleep for priority reasons to see if it should run now.
	*/
	if (rt_task(p)) {
	if ((jiffies % DEF_TIMESLICE) >
	(sd->per_cpu_gain * DEF_TIMESLICE / 100))
	resched_task(smt_curr);
	} else {
	if (TASK_PREEMPTS_CURR(p, smt_rq) &&
	smt_slice(p, sd) > task_timeslice(smt_curr))
	resched_task(smt_curr);
	else
	wakeup_busy_runqueue(smt_rq);
	}		}
			unlock:
			spin_unlock(&smt_rq->lock);
	}		}
	out_unlock:
	for_each_cpu_mask(i, sibling_map)
	spin_unlock(&cpu_rq(i)->lock);
	return ret;		return ret;
	}		}
	#else		#else
	static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)		static inline void wake_sleeping_dependent(int this_cpu)
	{		{
	}		}

	static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)		static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq,
			task_t *p)
	{		{
	return 0;		return 0;
	}		}
	@@ -2972,32 +2926,13 @@ asmlinkage void __sched schedule(void)

	cpu = smp_processor_id();		cpu = smp_processor_id();
	if (unlikely(!rq->nr_running)) {		if (unlikely(!rq->nr_running)) {
	go_idle:
	idle_balance(cpu, rq);		idle_balance(cpu, rq);
	if (!rq->nr_running) {		if (!rq->nr_running) {
	next = rq->idle;		next = rq->idle;
	rq->expired_timestamp = 0;		rq->expired_timestamp = 0;
	wake_sleeping_dependent(cpu, rq);		wake_sleeping_dependent(cpu);
	/*
	* wake_sleeping_dependent() might have released
	* the runqueue, so break out if we got new
	* tasks meanwhile:
	*/
	if (!rq->nr_running)
	goto switch_tasks;
	}
	} else {
	if (dependent_sleeper(cpu, rq)) {
	next = rq->idle;
	goto switch_tasks;		goto switch_tasks;
	}		}
	/*
	* dependent_sleeper() releases and reacquires the runqueue
	* lock, hence go into the idle loop if the rq went
	* empty meanwhile:
	*/
	if (unlikely(!rq->nr_running))
	goto go_idle;
	}		}

	array = rq->active;		array = rq->active;
	@@ -3035,6 +2970,8 @@ asmlinkage void __sched schedule(void)
	}		}
	}		}
	next->sleep_type = SLEEP_NORMAL;		next->sleep_type = SLEEP_NORMAL;
			if (dependent_sleeper(cpu, rq, next))
			next = rq->idle;
	switch_tasks:		switch_tasks:
	if (next == rq->idle)		if (next == rq->idle)
	schedstat_inc(rq, sched_goidle);		schedstat_inc(rq, sched_goidle);
	@@ -6144,7 +6081,6 @@ void __init sched_init(void)
	rq->push_cpu = 0;		rq->push_cpu = 0;
	rq->migration_thread = NULL;		rq->migration_thread = NULL;
	INIT_LIST_HEAD(&rq->migration_queue);		INIT_LIST_HEAD(&rq->migration_queue);
	rq->cpu = i;
	#endif		#endif
	atomic_set(&rq->nr_iowait, 0);		atomic_set(&rq->nr_iowait, 0);