Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f65eda4f authored by Steven Rostedt's avatar Steven Rostedt Committed by Ingo Molnar
Browse files

sched: pull RT tasks from overloaded runqueues



This patch adds the algorithm to pull tasks from RT overloaded runqueues.

When a pull RT is initiated, all overloaded runqueues are examined for
a RT task that is higher in prio than the highest prio task queued on the
target runqueue. If another runqueue holds a RT task that is of higher
prio than the highest prio task on the target runqueue is found it is pulled
to the target runqueue.

Signed-off-by: default avatarSteven Rostedt <srostedt@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 4fd29176
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -3721,6 +3721,8 @@ asmlinkage void __sched schedule(void)
		switch_count = &prev->nvcsw;
	}

	schedule_balance_rt(rq, prev);

	if (unlikely(!rq->nr_running))
		idle_balance(cpu, rq);

+176 −11
Original line number Diff line number Diff line
@@ -179,8 +179,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);

static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
	if (!task_running(rq, p) &&
	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
		return 1;
	return 0;
}

/* Return the second highest RT task, NULL otherwise */
static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
						     int cpu)
{
	struct rt_prio_array *array = &rq->rt.active;
	struct task_struct *next;
@@ -199,26 +208,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
	}

	queue = array->queue + idx;
	BUG_ON(list_empty(queue));

	next = list_entry(queue->next, struct task_struct, run_list);
	if (unlikely(next != rq->curr))
		return next;
	if (unlikely(pick_rt_task(rq, next, cpu)))
		goto out;

	if (queue->next->next != queue) {
		/* same prio task */
		next = list_entry(queue->next->next, struct task_struct, run_list);
		return next;
		if (pick_rt_task(rq, next, cpu))
			goto out;
	}

 retry:
	/* slower, but more flexible */
	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
	if (unlikely(idx >= MAX_RT_PRIO)) {
		WARN_ON(1); /* rt_nr_running was 2 and above! */
	if (unlikely(idx >= MAX_RT_PRIO))
		return NULL;
	}

	queue = array->queue + idx;
	next = list_entry(queue->next, struct task_struct, run_list);
	BUG_ON(list_empty(queue));

	list_for_each_entry(next, queue, run_list) {
		if (pick_rt_task(rq, next, cpu))
			goto out;
	}

	goto retry;

 out:
	return next;
}

@@ -305,13 +324,15 @@ static int push_rt_task(struct rq *this_rq)

	assert_spin_locked(&this_rq->lock);

	next_task = pick_next_highest_task_rt(this_rq);
	next_task = pick_next_highest_task_rt(this_rq, -1);
	if (!next_task)
		return 0;

 retry:
	if (unlikely(next_task == this_rq->curr))
	if (unlikely(next_task == this_rq->curr)) {
		WARN_ON(1);
		return 0;
	}

	/*
	 * It's possible that the next_task slipped in of
@@ -335,7 +356,7 @@ static int push_rt_task(struct rq *this_rq)
		 * so it is possible that next_task has changed.
		 * If it has, then try again.
		 */
		task = pick_next_highest_task_rt(this_rq);
		task = pick_next_highest_task_rt(this_rq, -1);
		if (unlikely(task != next_task) && task && paranoid--) {
			put_task_struct(next_task);
			next_task = task;
@@ -378,6 +399,149 @@ static void push_rt_tasks(struct rq *rq)
		;
}

static int pull_rt_task(struct rq *this_rq)
{
	struct task_struct *next;
	struct task_struct *p;
	struct rq *src_rq;
	cpumask_t *rto_cpumask;
	int this_cpu = this_rq->cpu;
	int cpu;
	int ret = 0;

	assert_spin_locked(&this_rq->lock);

	/*
	 * If cpusets are used, and we have overlapping
	 * run queue cpusets, then this algorithm may not catch all.
	 * This is just the price you pay on trying to keep
	 * dirtying caches down on large SMP machines.
	 */
	if (likely(!rt_overloaded()))
		return 0;

	next = pick_next_task_rt(this_rq);

	rto_cpumask = rt_overload();

	for_each_cpu_mask(cpu, *rto_cpumask) {
		if (this_cpu == cpu)
			continue;

		src_rq = cpu_rq(cpu);
		if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
			/*
			 * It is possible that overlapping cpusets
			 * will miss clearing a non overloaded runqueue.
			 * Clear it now.
			 */
			if (double_lock_balance(this_rq, src_rq)) {
				/* unlocked our runqueue lock */
				struct task_struct *old_next = next;
				next = pick_next_task_rt(this_rq);
				if (next != old_next)
					ret = 1;
			}
			if (likely(src_rq->rt.rt_nr_running <= 1))
				/*
				 * Small chance that this_rq->curr changed
				 * but it's really harmless here.
				 */
				rt_clear_overload(this_rq);
			else
				/*
				 * Heh, the src_rq is now overloaded, since
				 * we already have the src_rq lock, go straight
				 * to pulling tasks from it.
				 */
				goto try_pulling;
			spin_unlock(&src_rq->lock);
			continue;
		}

		/*
		 * We can potentially drop this_rq's lock in
		 * double_lock_balance, and another CPU could
		 * steal our next task - hence we must cause
		 * the caller to recalculate the next task
		 * in that case:
		 */
		if (double_lock_balance(this_rq, src_rq)) {
			struct task_struct *old_next = next;
			next = pick_next_task_rt(this_rq);
			if (next != old_next)
				ret = 1;
		}

		/*
		 * Are there still pullable RT tasks?
		 */
		if (src_rq->rt.rt_nr_running <= 1) {
			spin_unlock(&src_rq->lock);
			continue;
		}

 try_pulling:
		p = pick_next_highest_task_rt(src_rq, this_cpu);

		/*
		 * Do we have an RT task that preempts
		 * the to-be-scheduled task?
		 */
		if (p && (!next || (p->prio < next->prio))) {
			WARN_ON(p == src_rq->curr);
			WARN_ON(!p->se.on_rq);

			/*
			 * There's a chance that p is higher in priority
			 * than what's currently running on its cpu.
			 * This is just that p is wakeing up and hasn't
			 * had a chance to schedule. We only pull
			 * p if it is lower in priority than the
			 * current task on the run queue or
			 * this_rq next task is lower in prio than
			 * the current task on that rq.
			 */
			if (p->prio < src_rq->curr->prio ||
			    (next && next->prio < src_rq->curr->prio))
				goto bail;

			ret = 1;

			deactivate_task(src_rq, p, 0);
			set_task_cpu(p, this_cpu);
			activate_task(this_rq, p, 0);
			/*
			 * We continue with the search, just in
			 * case there's an even higher prio task
			 * in another runqueue. (low likelyhood
			 * but possible)
			 */

			/*
			 * Update next so that we won't pick a task
			 * on another cpu with a priority lower (or equal)
			 * than the one we just picked.
			 */
			next = p;

		}
 bail:
		spin_unlock(&src_rq->lock);
	}

	return ret;
}

static void schedule_balance_rt(struct rq *rq,
				struct task_struct *prev)
{
	/* Try to pull RT tasks here if we lower this rq's prio */
	if (unlikely(rt_task(prev)) &&
	    rq->rt.highest_prio > prev->prio)
		pull_rt_task(rq);
}

static void schedule_tail_balance_rt(struct rq *rq)
{
	/*
@@ -500,6 +664,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
}
#else /* CONFIG_SMP */
# define schedule_tail_balance_rt(rq)	do { } while (0)
# define schedule_balance_rt(rq, prev)	do { } while (0)
#endif /* CONFIG_SMP */

static void task_tick_rt(struct rq *rq, struct task_struct *p)