Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cd21debe authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
 "From the scheduler departement:

   - a bunch of sched deadline related fixes which deal with various
     buglets and corner cases.

   - two fixes for the loadavg spikes which are caused by the delayed
     NOHZ accounting"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/deadline: Use deadline instead of period when calculating overflow
  sched/deadline: Throttle a constrained deadline task activated after the deadline
  sched/deadline: Make sure the replenishment timer fires in the next period
  sched/loadavg: Use {READ,WRITE}_ONCE() for sample window
  sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ accounting
  sched/deadline: Add missing update_rq_clock() in dl_task_timer()
parents b5f13082 2317d5f1
Loading
Loading
Loading
Loading
+57 −6
Original line number Diff line number Diff line
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 *
 * This function returns true if:
 *
 *   runtime / (deadline - t) > dl_runtime / dl_period ,
 *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
 *
 * IOW we can't recycle current parameters.
 *
 * Notice that the bandwidth check is done against the period. For
 * Notice that the bandwidth check is done against the deadline. For
 * task with deadline equal to period this is the same of using
 * dl_deadline instead of dl_period in the equation above.
 * dl_period instead of dl_deadline in the equation above.
 */
static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
			       struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
	 * of anything below microseconds resolution is actually fiction
	 * (but still we want to give the user that illusion >;).
	 */
	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
	left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
	right = ((dl_se->deadline - t) >> DL_SCALE) *
		(pi_se->dl_runtime >> DL_SCALE);

@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
	}
}

static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
{
	return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
}

/*
 * If the entity depleted all its runtime, and if we want it to sleep
 * while waiting for some new execution time to become available, we
 * set the bandwidth enforcement timer to the replenishment instant
 * set the bandwidth replenishment timer to the replenishment instant
 * and try to activate it.
 *
 * Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
	 * that it is actually coming from rq->clock and not from
	 * hrtimer's time base reading.
	 */
	act = ns_to_ktime(dl_se->deadline);
	act = ns_to_ktime(dl_next_period(dl_se));
	now = hrtimer_cb_get_time(timer);
	delta = ktime_to_ns(now) - rq_clock(rq);
	act = ktime_add_ns(act, delta);
@@ -638,6 +643,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
		lockdep_unpin_lock(&rq->lock, rf.cookie);
		rq = dl_task_offline_migration(rq, p);
		rf.cookie = lockdep_pin_lock(&rq->lock);
		update_rq_clock(rq);

		/*
		 * Now that the task has been migrated to the new RQ and we
@@ -689,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
	timer->function = dl_task_timer;
}

/*
 * During the activation, CBS checks if it can reuse the current task's
 * runtime and period. If the deadline of the task is in the past, CBS
 * cannot use the runtime, and so it replenishes the task. This rule
 * works fine for implicit deadline tasks (deadline == period), and the
 * CBS was designed for implicit deadline tasks. However, a task with
 * constrained deadline (deadine < period) might be awakened after the
 * deadline, but before the next period. In this case, replenishing the
 * task would allow it to run for runtime / deadline. As in this case
 * deadline < period, CBS enables a task to run for more than the
 * runtime / period. In a very loaded system, this can cause a domino
 * effect, making other tasks miss their deadlines.
 *
 * To avoid this problem, in the activation of a constrained deadline
 * task after the deadline but before the next period, throttle the
 * task and set the replenishing timer to the begin of the next period,
 * unless it is boosted.
 */
static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
{
	struct task_struct *p = dl_task_of(dl_se);
	struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));

	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
	    dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
			return;
		dl_se->dl_throttled = 1;
	}
}

static
int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
{
@@ -922,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
	__dequeue_dl_entity(dl_se);
}

static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
{
	return dl_se->dl_deadline < dl_se->dl_period;
}

static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
	struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -947,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
		return;
	}

	/*
	 * Check if a constrained deadline task was activated
	 * after the deadline but before the next period.
	 * If that is the case, the task will be throttled and
	 * the replenishment timer will be set to the next period.
	 */
	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
		dl_check_constrained_dl(&p->dl);

	/*
	 * If p is throttled, we do nothing. In fact, if it exhausted
	 * its budget it needs a replenishment and, since it now is on
+12 −8
Original line number Diff line number Diff line
@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
	 * If the folding window started, make sure we start writing in the
	 * next idle-delta.
	 */
	if (!time_before(jiffies, calc_load_update))
	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
		idx++;

	return idx & 1;
@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
	struct rq *this_rq = this_rq();

	/*
	 * If we're still before the sample window, we're done.
	 * If we're still before the pending sample window, we're done.
	 */
	this_rq->calc_load_update = READ_ONCE(calc_load_update);
	if (time_before(jiffies, this_rq->calc_load_update))
		return;

@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
	 * accounted through the nohz accounting, so skip the entire deal and
	 * sync up for the next window.
	 */
	this_rq->calc_load_update = calc_load_update;
	if (time_before(jiffies, this_rq->calc_load_update + 10))
		this_rq->calc_load_update += LOAD_FREQ;
}
@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
 */
static void calc_global_nohz(void)
{
	unsigned long sample_window;
	long delta, active, n;

	if (!time_before(jiffies, calc_load_update + 10)) {
	sample_window = READ_ONCE(calc_load_update);
	if (!time_before(jiffies, sample_window + 10)) {
		/*
		 * Catch-up, fold however many we are behind still
		 */
		delta = jiffies - calc_load_update - 10;
		delta = jiffies - sample_window - 10;
		n = 1 + (delta / LOAD_FREQ);

		active = atomic_long_read(&calc_load_tasks);
@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);

		calc_load_update += n * LOAD_FREQ;
		WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
	}

	/*
@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
 */
void calc_global_load(unsigned long ticks)
{
	unsigned long sample_window;
	long active, delta;

	if (time_before(jiffies, calc_load_update + 10))
	sample_window = READ_ONCE(calc_load_update);
	if (time_before(jiffies, sample_window + 10))
		return;

	/*
@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
	avenrun[2] = calc_load(avenrun[2], EXP_15, active);

	calc_load_update += LOAD_FREQ;
	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);

	/*
	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.