Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit acd58620 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Thomas Gleixner
Browse files

sched/rtmutex: Refactor rt_mutex_setprio()



With the introduction of SCHED_DEADLINE the whole notion that priority
is a single number is gone, therefore the @prio argument to
rt_mutex_setprio() doesn't make sense anymore.

So rework the code to pass a pi_task instead.

Note this also fixes a problem with pi_top_task caching; previously we
would not set the pointer (call rt_mutex_update_top_task) if the
priority didn't change, this could lead to a stale pointer.

As for the XXX, I think its fine to use pi_task->prio, because if it
differs from waiter->prio, a PI chain update is immenent.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: bristot@redhat.com
Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org


Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent aa2bfe55
Loading
Loading
Loading
Loading
+8 −16
Original line number Diff line number Diff line
@@ -18,28 +18,20 @@ static inline int rt_task(struct task_struct *p)
}

#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio);
extern void rt_mutex_update_top_task(struct task_struct *p);
extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
/*
 * Must hold either p->pi_lock or task_rq(p)->lock.
 */
static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p)
{
	return p->pi_top_task;
}
extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
extern void rt_mutex_adjust_pi(struct task_struct *p);
static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
{
	return tsk->pi_blocked_on != NULL;
}
#else
static inline int rt_mutex_getprio(struct task_struct *p)
{
	return p->normal_prio;
}

static inline int rt_mutex_get_effective_prio(struct task_struct *task,
					      int newprio)
{
	return newprio;
}

static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
	return NULL;
+30 −82
Original line number Diff line number Diff line
@@ -322,67 +322,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
	RB_CLEAR_NODE(&waiter->pi_tree_entry);
}

/*
 * Must hold both p->pi_lock and task_rq(p)->lock.
 */
void rt_mutex_update_top_task(struct task_struct *p)
{
	if (!task_has_pi_waiters(p)) {
		p->pi_top_task = NULL;
		return;
	}

	p->pi_top_task = task_top_pi_waiter(p)->task;
}

/*
 * Calculate task priority from the waiter tree priority
 *
 * Return task->normal_prio when the waiter tree is empty or when
 * the waiter is not allowed to do priority boosting
 */
int rt_mutex_getprio(struct task_struct *task)
{
	if (likely(!task_has_pi_waiters(task)))
		return task->normal_prio;

	return min(task_top_pi_waiter(task)->prio,
		   task->normal_prio);
}

/*
 * Must hold either p->pi_lock or task_rq(p)->lock.
 */
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
	return task->pi_top_task;
}

/*
 * Called by sched_setscheduler() to get the priority which will be
 * effective after the change.
 */
int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
static void rt_mutex_adjust_prio(struct task_struct *p)
{
	struct task_struct *top_task = rt_mutex_get_top_task(task);
	struct task_struct *pi_task = NULL;

	if (!top_task)
		return newprio;
	lockdep_assert_held(&p->pi_lock);

	return min(top_task->prio, newprio);
}
	if (task_has_pi_waiters(p))
		pi_task = task_top_pi_waiter(p)->task;

/*
 * Adjust the priority of a task, after its pi_waiters got modified.
 *
 * This can be both boosting and unboosting. task->pi_lock must be held.
 */
static void __rt_mutex_adjust_prio(struct task_struct *task)
{
	int prio = rt_mutex_getprio(task);

	if (task->prio != prio || dl_prio(prio))
		rt_mutex_setprio(task, prio);
	rt_mutex_setprio(p, pi_task);
}

/*
@@ -742,7 +691,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
		 */
		rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
		rt_mutex_enqueue_pi(task, waiter);
		__rt_mutex_adjust_prio(task);
		rt_mutex_adjust_prio(task);

	} else if (prerequeue_top_waiter == waiter) {
		/*
@@ -758,7 +707,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
		rt_mutex_dequeue_pi(task, waiter);
		waiter = rt_mutex_top_waiter(lock);
		rt_mutex_enqueue_pi(task, waiter);
		__rt_mutex_adjust_prio(task);
		rt_mutex_adjust_prio(task);
	} else {
		/*
		 * Nothing changed. No need to do any priority
@@ -966,7 +915,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
		return -EDEADLK;

	raw_spin_lock(&task->pi_lock);
	__rt_mutex_adjust_prio(task);
	rt_mutex_adjust_prio(task);
	waiter->task = task;
	waiter->lock = lock;
	waiter->prio = task->prio;
@@ -988,7 +937,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
		rt_mutex_dequeue_pi(owner, top_waiter);
		rt_mutex_enqueue_pi(owner, waiter);

		__rt_mutex_adjust_prio(owner);
		rt_mutex_adjust_prio(owner);
		if (owner->pi_blocked_on)
			chain_walk = 1;
	} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1040,13 +989,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
	waiter = rt_mutex_top_waiter(lock);

	/*
	 * Remove it from current->pi_waiters. We do not adjust a
	 * possible priority boost right now. We execute wakeup in the
	 * boosted mode and go back to normal after releasing
	 * lock->wait_lock.
	 * Remove it from current->pi_waiters and deboost.
	 *
	 * We must in fact deboost here in order to ensure we call
	 * rt_mutex_setprio() to update p->pi_top_task before the
	 * task unblocks.
	 */
	rt_mutex_dequeue_pi(current, waiter);
	__rt_mutex_adjust_prio(current);
	rt_mutex_adjust_prio(current);

	/*
	 * As we are waking up the top waiter, and the waiter stays
@@ -1058,9 +1008,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
	 */
	lock->owner = (void *) RT_MUTEX_HAS_WAITERS;

	raw_spin_unlock(&current->pi_lock);

	/*
	 * We deboosted before waking the top waiter task such that we don't
	 * run two tasks with the 'same' priority (and ensure the
	 * p->pi_top_task pointer points to a blocked task). This however can
	 * lead to priority inversion if we would get preempted after the
	 * deboost but before waking our donor task, hence the preempt_disable()
	 * before unlock.
	 *
	 * Pairs with preempt_enable() in rt_mutex_postunlock();
	 */
	preempt_disable();
	wake_q_add(wake_q, waiter->task);
	raw_spin_unlock(&current->pi_lock);
}

/*
@@ -1095,7 +1055,7 @@ static void remove_waiter(struct rt_mutex *lock,
	if (rt_mutex_has_waiters(lock))
		rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));

	__rt_mutex_adjust_prio(owner);
	rt_mutex_adjust_prio(owner);

	/* Store the lock on which owner is blocked or NULL */
	next_lock = task_blocked_on_lock(owner);
@@ -1134,8 +1094,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
	raw_spin_lock_irqsave(&task->pi_lock, flags);

	waiter = task->pi_blocked_on;
	if (!waiter || (waiter->prio == task->prio &&
			!dl_prio(task->prio))) {
	if (!waiter || (waiter->prio == task->prio && !dl_prio(task->prio))) {
		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
		return;
	}
@@ -1389,17 +1348,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
	 * Queue the next waiter for wakeup once we release the wait_lock.
	 */
	mark_wakeup_next_waiter(wake_q, lock);

	/*
	 * We should deboost before waking the top waiter task such that
	 * we don't run two tasks with the 'same' priority. This however
	 * can lead to prio-inversion if we would get preempted after
	 * the deboost but before waking our high-prio task, hence the
	 * preempt_disable before unlock. Pairs with preempt_enable() in
	 * rt_mutex_postunlock();
	 */
	preempt_disable();

	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);

	return true; /* call rt_mutex_postunlock() */
+53 −13
Original line number Diff line number Diff line
@@ -3671,10 +3671,25 @@ EXPORT_SYMBOL(default_wake_function);

#ifdef CONFIG_RT_MUTEXES

static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
{
	if (pi_task)
		prio = min(prio, pi_task->prio);

	return prio;
}

static inline int rt_effective_prio(struct task_struct *p, int prio)
{
	struct task_struct *pi_task = rt_mutex_get_top_task(p);

	return __rt_effective_prio(pi_task, prio);
}

/*
 * rt_mutex_setprio - set the current priority of a task
 * @p: task
 * @prio: prio value (kernel-internal form)
 * @p: task to boost
 * @pi_task: donor task
 *
 * This function changes the 'effective' priority of a task. It does
 * not touch ->normal_prio like __setscheduler().
@@ -3682,18 +3697,42 @@ EXPORT_SYMBOL(default_wake_function);
 * Used by the rt_mutex code to implement priority inheritance
 * logic. Call site only calls if the priority of the task changed.
 */
void rt_mutex_setprio(struct task_struct *p, int prio)
void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
{
	int oldprio, queued, running, queue_flag =
	int prio, oldprio, queued, running, queue_flag =
		DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
	const struct sched_class *prev_class;
	struct rq_flags rf;
	struct rq *rq;

	BUG_ON(prio > MAX_PRIO);
	/* XXX used to be waiter->prio, not waiter->task->prio */
	prio = __rt_effective_prio(pi_task, p->normal_prio);

	/*
	 * If nothing changed; bail early.
	 */
	if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio))
		return;

	rq = __task_rq_lock(p, &rf);
	update_rq_clock(rq);
	/*
	 * Set under pi_lock && rq->lock, such that the value can be used under
	 * either lock.
	 *
	 * Note that there is loads of tricky to make this pointer cache work
	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
	 * ensure a task is de-boosted (pi_task is set to NULL) before the
	 * task is allowed to run again (and can exit). This ensures the pointer
	 * points to a blocked task -- which guaratees the task is present.
	 */
	p->pi_top_task = pi_task;

	/*
	 * For FIFO/RR we only need to set prio, if that matches we're done.
	 */
	if (prio == p->prio && !dl_prio(prio))
		goto out_unlock;

	/*
	 * Idle task boosting is a nono in general. There is one
@@ -3713,9 +3752,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
		goto out_unlock;
	}

	rt_mutex_update_top_task(p);

	trace_sched_pi_setprio(p, prio);
	trace_sched_pi_setprio(p, prio); /* broken */
	oldprio = p->prio;

	if (oldprio == prio)
@@ -3739,7 +3776,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
	 *          running task
	 */
	if (dl_prio(prio)) {
		struct task_struct *pi_task = rt_mutex_get_top_task(p);
		if (!dl_prio(p->normal_prio) ||
		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
			p->dl.dl_boosted = 1;
@@ -3777,6 +3813,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
	balance_callback(rq);
	preempt_enable();
}
#else
static inline int rt_effective_prio(struct task_struct *p, int prio)
{
	return prio;
}
#endif

void set_user_nice(struct task_struct *p, long nice)
@@ -4023,10 +4064,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
	 * Keep a potential priority boosting if called from
	 * sched_setscheduler().
	 */
	if (keep_boost)
		p->prio = rt_mutex_get_effective_prio(p, normal_prio(p));
	else
	p->prio = normal_prio(p);
	if (keep_boost)
		p->prio = rt_effective_prio(p, p->prio);

	if (dl_prio(p->prio))
		p->sched_class = &dl_sched_class;
@@ -4313,7 +4353,7 @@ static int __sched_setscheduler(struct task_struct *p,
		 * the runqueue. This will be done when the task deboost
		 * itself.
		 */
		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
		new_effective_prio = rt_effective_prio(p, newprio);
		if (new_effective_prio == oldprio)
			queue_flags &= ~DEQUEUE_MOVE;
	}