Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e2defd02 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Thiscontains misc fixes: preempt_schedule_common() and io_schedule()
  recursion fixes, sched/dl fixes, a completion_done() revert, two
  sched/rt fixes and a comment update patch"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/rt: Avoid obvious configuration fail
  sched/autogroup: Fix failure to set cpu.rt_runtime_us
  sched/dl: Do update_rq_clock() in yield_task_dl()
  sched: Prevent recursion in io_schedule()
  sched/completion: Serialize completion_done() with complete()
  sched: Fix preempt_schedule_common() triggering tracing recursion
  sched/dl: Prevent enqueue of a sleeping task in dl_task_timer()
  sched: Make dl_task_time() use task_rq_lock()
  sched: Clarify ordering between task_rq_lock() and move_queued_task()
parents b5aeca54 2636ed5f
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
 */
extern void show_stack(struct task_struct *task, unsigned long *sp);

void io_schedule(void);
long io_schedule_timeout(long timeout);

extern void cpu_init (void);
extern void trap_init(void);
extern void update_process_times(int user);
@@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);

extern long io_schedule_timeout(long timeout);

static inline void io_schedule(void)
{
	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
}

struct nsproxy;
struct user_namespace;

+1 −5
Original line number Diff line number Diff line
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
	 * so we don't have to move tasks around upon policy change,
	 * or flail around trying to allocate bandwidth on the fly.
	 * A bandwidth exception in __sched_setscheduler() allows
	 * the policy change to proceed.  Thereafter, task_group()
	 * returns &root_task_group, so zero bandwidth is required.
	 * the policy change to proceed.
	 */
	free_rt_sched_group(tg);
	tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
	if (tg != &root_task_group)
		return false;

	if (p->sched_class != &fair_sched_class)
		return false;

	/*
	 * We can only assume the task group can't go away on us if
	 * autogroup_move_group() can see us on ->thread_group list.
+17 −2
Original line number Diff line number Diff line
@@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
	 * first without taking the lock so we can
	 * return early in the blocking case.
	 */
	if (!ACCESS_ONCE(x->done))
	if (!READ_ONCE(x->done))
		return 0;

	spin_lock_irqsave(&x->wait.lock, flags);
@@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
 */
bool completion_done(struct completion *x)
{
	return !!ACCESS_ONCE(x->done);
	if (!READ_ONCE(x->done))
		return false;

	/*
	 * If ->done, we need to wait for complete() to release ->wait.lock
	 * otherwise we can end up freeing the completion before complete()
	 * is done referencing it.
	 *
	 * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
	 * the loads of ->done and ->wait.lock such that we cannot observe
	 * the lock before complete() acquires it while observing the ->done
	 * after it's acquired the lock.
	 */
	smp_rmb();
	spin_unlock_wait(&x->wait.lock);
	return true;
}
EXPORT_SYMBOL(completion_done);
+30 −83
Original line number Diff line number Diff line
@@ -306,66 +306,6 @@ __read_mostly int scheduler_running;
 */
int sysctl_sched_rt_runtime = 950000;

/*
 * __task_rq_lock - lock the rq @p resides on.
 */
static inline struct rq *__task_rq_lock(struct task_struct *p)
	__acquires(rq->lock)
{
	struct rq *rq;

	lockdep_assert_held(&p->pi_lock);

	for (;;) {
		rq = task_rq(p);
		raw_spin_lock(&rq->lock);
		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
			return rq;
		raw_spin_unlock(&rq->lock);

		while (unlikely(task_on_rq_migrating(p)))
			cpu_relax();
	}
}

/*
 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
 */
static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
	__acquires(p->pi_lock)
	__acquires(rq->lock)
{
	struct rq *rq;

	for (;;) {
		raw_spin_lock_irqsave(&p->pi_lock, *flags);
		rq = task_rq(p);
		raw_spin_lock(&rq->lock);
		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
			return rq;
		raw_spin_unlock(&rq->lock);
		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);

		while (unlikely(task_on_rq_migrating(p)))
			cpu_relax();
	}
}

static void __task_rq_unlock(struct rq *rq)
	__releases(rq->lock)
{
	raw_spin_unlock(&rq->lock);
}

static inline void
task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
	__releases(rq->lock)
	__releases(p->pi_lock)
{
	raw_spin_unlock(&rq->lock);
	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}

/*
 * this_rq_lock - lock this runqueue and disable interrupts.
 */
@@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
	preempt_disable();
}

static void preempt_schedule_common(void)
static void __sched notrace preempt_schedule_common(void)
{
	do {
		__preempt_count_add(PREEMPT_ACTIVE);
@@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
 * that process accounting knows that this is a task in IO wait state.
 */
void __sched io_schedule(void)
{
	struct rq *rq = raw_rq();

	delayacct_blkio_start();
	atomic_inc(&rq->nr_iowait);
	blk_flush_plug(current);
	current->in_iowait = 1;
	schedule();
	current->in_iowait = 0;
	atomic_dec(&rq->nr_iowait);
	delayacct_blkio_end();
}
EXPORT_SYMBOL(io_schedule);

long __sched io_schedule_timeout(long timeout)
{
	struct rq *rq = raw_rq();
	int old_iowait = current->in_iowait;
	struct rq *rq;
	long ret;

	current->in_iowait = 1;
	if (old_iowait)
		blk_schedule_flush_plug(current);
	else
		blk_flush_plug(current);

	delayacct_blkio_start();
	rq = raw_rq();
	atomic_inc(&rq->nr_iowait);
	blk_flush_plug(current);
	current->in_iowait = 1;
	ret = schedule_timeout(timeout);
	current->in_iowait = 0;
	current->in_iowait = old_iowait;
	atomic_dec(&rq->nr_iowait);
	delayacct_blkio_end();

	return ret;
}
EXPORT_SYMBOL(io_schedule_timeout);

/**
 * sys_sched_get_priority_max - return maximum RT priority.
@@ -7642,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
{
	struct task_struct *g, *p;

	/*
	 * Autogroups do not have RT tasks; see autogroup_create().
	 */
	if (task_group_is_autogroup(tg))
		return 0;

	for_each_process_thread(g, p) {
		if (rt_task(p) && task_group(p) == tg)
			return 1;
@@ -7734,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
{
	int i, err = 0;

	/*
	 * Disallowing the root group RT runtime is BAD, it would disallow the
	 * kernel creating (and or operating) RT threads.
	 */
	if (tg == &root_task_group && rt_runtime == 0)
		return -EINVAL;

	/* No period doesn't make any sense. */
	if (rt_period == 0)
		return -EINVAL;

	mutex_lock(&rt_constraints_mutex);
	read_lock(&tasklist_lock);
	err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7790,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
	rt_period = (u64)rt_period_us * NSEC_PER_USEC;
	rt_runtime = tg->rt_bandwidth.rt_runtime;

	if (rt_period == 0)
		return -EINVAL;

	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
}

+24 −9
Original line number Diff line number Diff line
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
						     struct sched_dl_entity,
						     dl_timer);
	struct task_struct *p = dl_task_of(dl_se);
	unsigned long flags;
	struct rq *rq;
again:
	rq = task_rq(p);
	raw_spin_lock(&rq->lock);

	if (rq != task_rq(p)) {
		/* Task was moved, retrying. */
		raw_spin_unlock(&rq->lock);
		goto again;
	}
	rq = task_rq_lock(current, &flags);

	/*
	 * We need to take care of several possible races here:
@@ -541,6 +535,26 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)

	sched_clock_tick();
	update_rq_clock(rq);

	/*
	 * If the throttle happened during sched-out; like:
	 *
	 *   schedule()
	 *     deactivate_task()
	 *       dequeue_task_dl()
	 *         update_curr_dl()
	 *           start_dl_timer()
	 *         __dequeue_task_dl()
	 *     prev->on_rq = 0;
	 *
	 * We can be both throttled and !queued. Replenish the counter
	 * but do not enqueue -- wait for our wakeup to do that.
	 */
	if (!task_on_rq_queued(p)) {
		replenish_dl_entity(dl_se, dl_se);
		goto unlock;
	}

	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
	if (dl_task(rq->curr))
		check_preempt_curr_dl(rq, p, 0);
@@ -555,7 +569,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
		push_dl_task(rq);
#endif
unlock:
	raw_spin_unlock(&rq->lock);
	task_rq_unlock(rq, current, &flags);

	return HRTIMER_NORESTART;
}
@@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
		rq->curr->dl.dl_yielded = 1;
		p->dl.runtime = 0;
	}
	update_rq_clock(rq);
	update_curr_dl(rq);
}

Loading