Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 354879bb authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

sched_clock: fix cpu_clock()



This patch fixes 3 issues:

a) it removes the dependency on jiffies, because jiffies are incremented
   by a single CPU, and the tick is not synchronized between CPUs. Therefore
   relying on it to calculate a window to clip whacky TSC values doesn't work
   as it can drift around.

   So instead use [GTOD, GTOD+TICK_NSEC) as the window.

b) __update_sched_clock() did (roughly speaking):

   delta = sched_clock() - scd->tick_raw;
   clock += delta;

   Which gives exponential growth, instead of linear.

c) allows the sched_clock_cpu() value to warp the u64 without breaking.

the results are more reliable sched_clock() deltas:

           before       after   sched_clock

cpu_clock: 15750        51312   51488
cpu_clock: 59719        51052   50947
cpu_clock: 15879        51249   51061
cpu_clock: 1            50933   51198
cpu_clock: 1            50931   51039
cpu_clock: 1            51093   50981
cpu_clock: 1            51043   51040
cpu_clock: 1            50959   50938
cpu_clock: 1            50981   51011
cpu_clock: 1            51364   51212
cpu_clock: 1            51219   51273
cpu_clock: 1            51389   51048
cpu_clock: 1            51285   51611
cpu_clock: 1            50964   51137
cpu_clock: 1            50973   50968
cpu_clock: 1            50967   50972
cpu_clock: 1            58910   58485
cpu_clock: 1            51082   51025
cpu_clock: 1            50957   50958
cpu_clock: 1            50958   50957
cpu_clock: 1006128      51128   50971
cpu_clock: 1            51107   51155
cpu_clock: 1            51371   51081
cpu_clock: 1            51104   51365
cpu_clock: 1            51363   51309
cpu_clock: 1            51107   51160
cpu_clock: 1            51139   51100
cpu_clock: 1            51216   51136
cpu_clock: 1            51207   51215
cpu_clock: 1            51087   51263
cpu_clock: 1            51249   51177
cpu_clock: 1            51519   51412
cpu_clock: 1            51416   51255
cpu_clock: 1            51591   51594
cpu_clock: 1            50966   51374
cpu_clock: 1            50966   50966
cpu_clock: 1            51291   50948
cpu_clock: 1            50973   50867
cpu_clock: 1            50970   50970
cpu_clock: 998306       50970   50971
cpu_clock: 1            50971   50970
cpu_clock: 1            50970   50970
cpu_clock: 1            50971   50971
cpu_clock: 1            50970   50970
cpu_clock: 1            51351   50970
cpu_clock: 1            50970   51352
cpu_clock: 1            50971   50970
cpu_clock: 1            50970   50970
cpu_clock: 1            51321   50971
cpu_clock: 1            50974   51324

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent efc2dead
Loading
Loading
Loading
Loading
+34 −50
Original line number Diff line number Diff line
@@ -12,19 +12,17 @@
 *
 * Create a semi stable clock from a mixture of other events, including:
 *  - gtod
 *  - jiffies
 *  - sched_clock()
 *  - explicit idle events
 *
 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
 * making it monotonic and keeping it within an expected window.  This window
 * is set up using jiffies.
 * making it monotonic and keeping it within an expected window.
 *
 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
 * that is otherwise invisible (TSC gets stopped).
 *
 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
 * consistent between cpus (never more than 1 jiffies difference).
 * consistent between cpus (never more than 2 jiffies difference).
 */
#include <linux/sched.h>
#include <linux/percpu.h>
@@ -54,7 +52,6 @@ struct sched_clock_data {
	 */
	raw_spinlock_t		lock;

	unsigned long		tick_jiffies;
	u64			tick_raw;
	u64			tick_gtod;
	u64			clock;
@@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
void sched_clock_init(void)
{
	u64 ktime_now = ktime_to_ns(ktime_get());
	unsigned long now_jiffies = jiffies;
	int cpu;

	for_each_possible_cpu(cpu) {
		struct sched_clock_data *scd = cpu_sdc(cpu);

		scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
		scd->tick_jiffies = now_jiffies;
		scd->tick_raw = 0;
		scd->tick_gtod = ktime_now;
		scd->clock = ktime_now;
@@ -91,47 +86,52 @@ void sched_clock_init(void)
	sched_clock_running = 1;
}

/*
 * min,max except they take wrapping into account
 */

static inline u64 wrap_min(u64 x, u64 y)
{
	return (s64)(x - y) < 0 ? x : y;
}

static inline u64 wrap_max(u64 x, u64 y)
{
	return (s64)(x - y) > 0 ? x : y;
}

/*
 * update the percpu scd from the raw @now value
 *
 *  - filter out backward motion
 *  - use jiffies to generate a min,max window to clip the raw values
 *  - use the GTOD tick value to create a window to filter crazy TSC values
 */
static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
{
	unsigned long now_jiffies = jiffies;
	long delta_jiffies = now_jiffies - scd->tick_jiffies;
	u64 clock = scd->clock;
	u64 min_clock, max_clock;
	s64 delta = now - scd->tick_raw;
	u64 clock, min_clock, max_clock;

	WARN_ON_ONCE(!irqs_disabled());
	min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;

	if (unlikely(delta < 0)) {
		clock++;
		goto out;
	}
	if (unlikely(delta < 0))
		delta = 0;

	max_clock = min_clock + TICK_NSEC;
	/*
	 * scd->clock = clamp(scd->tick_gtod + delta,
	 * 		      max(scd->tick_gtod, scd->clock),
	 * 		      scd->tick_gtod + TICK_NSEC);
	 */

	if (unlikely(clock + delta > max_clock)) {
		if (clock < max_clock)
			clock = max_clock;
		else
			clock++;
	} else {
		clock += delta;
	}
	clock = scd->tick_gtod + delta;
	min_clock = wrap_max(scd->tick_gtod, scd->clock);
	max_clock = scd->tick_gtod + TICK_NSEC;

 out:
	if (unlikely(clock < min_clock))
		clock = min_clock;
	clock = wrap_max(clock, min_clock);
	clock = wrap_min(clock, max_clock);

	scd->tick_jiffies = now_jiffies;
	scd->clock = clock;

	return clock;
	return scd->clock;
}

static void lock_double_clock(struct sched_clock_data *data1,
@@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu)
		 * larger time as the latest time for both
		 * runqueues. (this creates monotonic movement)
		 */
		if (likely(remote_clock < this_clock)) {
		if (likely((s64)(remote_clock - this_clock) < 0)) {
			clock = this_clock;
			scd->clock = clock;
		} else {
@@ -207,14 +207,9 @@ void sched_clock_tick(void)
	now = sched_clock();

	__raw_spin_lock(&scd->lock);
	__update_sched_clock(scd, now);
	/*
	 * update tick_gtod after __update_sched_clock() because that will
	 * already observe 1 new jiffy; adding a new tick_gtod to that would
	 * increase the clock 2 jiffies.
	 */
	scd->tick_raw = now;
	scd->tick_gtod = now_gtod;
	__update_sched_clock(scd, now);
	__raw_spin_unlock(&scd->lock);
}

@@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
 */
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
	struct sched_clock_data *scd = this_scd();

	/*
	 * Override the previous timestamp and ignore all
	 * sched_clock() deltas that occured while we idled,
	 * and use the PM-provided delta_ns to advance the
	 * rq clock:
	 */
	__raw_spin_lock(&scd->lock);
	scd->clock += delta_ns;
	__raw_spin_unlock(&scd->lock);

	sched_clock_tick();
	touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);