Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6b43ae8a authored by John Stultz's avatar John Stultz
Browse files

ntp: Fix leap-second hrtimer livelock



Since commit 7dffa3c6 the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.

Thomas diagnosed this as the following pattern:
CPU 0                                                    CPU 1
do_adjtimex()
  spin_lock_irq(&ntp_lock);
    process_adjtimex_modes();				 timer_interrupt()
      process_adj_status();                                do_timer()
        ntp_start_leap_timer();                             write_lock(&xtime_lock);
          hrtimer_start();                                  update_wall_time();
             hrtimer_reprogram();                            ntp_tick_length()
               tick_program_event()                            spin_lock(&ntp_lock);
                 clockevents_program_event()
		   ktime_get()
                     seq = req_seqbegin(xtime_lock);

This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.

The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ)  after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).

This patch applies on top of tip/timers/core.

CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: default avatarSasha Levin <levinsasha928@gmail.com>
Diagnoised-by: default avatarThomas Gleixner <tglx@linutronix.de>
Tested-by: default avatarSasha Levin <levinsasha928@gmail.com>
Signed-off-by: default avatarJohn Stultz <john.stultz@linaro.org>
parent 57779dc2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -252,7 +252,7 @@ extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);

extern void second_overflow(void);
extern int second_overflow(unsigned long secs);
extern int do_adjtimex(struct timex *);
extern void hardpps(const struct timespec *, const struct timespec *);

+41 −87
Original line number Diff line number Diff line
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
static u64			tick_length;
static u64			tick_length_base;

static struct hrtimer		leap_timer;

#define MAX_TICKADJ		500LL		/* usecs */
#define MAX_TICKADJ_SCALED \
	(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)


/*
 * Leap second processing. If in leap-insert state at the end of the
 * day, the system clock is set back one second; if in leap-delete
 * state, the system clock is set ahead one second.
 * this routine handles the overflow of the microsecond field
 *
 * The tricky bits of code to handle the accurate clock support
 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 * They were originally developed for SUN and DEC kernels.
 * All the kudos should go to Dave for this stuff.
 *
 * Also handles leap second processing, and returns leap offset
 */
static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
int second_overflow(unsigned long secs)
{
	enum hrtimer_restart res = HRTIMER_NORESTART;
	unsigned long flags;
	s64 delta;
	int leap = 0;
	unsigned long flags;

	spin_lock_irqsave(&ntp_lock, flags);

	/*
	 * Leap second processing. If in leap-insert state at the end of the
	 * day, the system clock is set back one second; if in leap-delete
	 * state, the system clock is set ahead one second.
	 */
	switch (time_state) {
	case TIME_OK:
		if (time_status & STA_INS)
			time_state = TIME_INS;
		else if (time_status & STA_DEL)
			time_state = TIME_DEL;
		break;
	case TIME_INS:
		if (secs % 86400 == 0) {
			leap = -1;
			time_state = TIME_OOP;
			printk(KERN_NOTICE
				"Clock: inserting leap second 23:59:60 UTC\n");
		hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
		res = HRTIMER_RESTART;
		}
		break;
	case TIME_DEL:
		if ((secs + 1) % 86400 == 0) {
			leap = 1;
			time_tai--;
			time_state = TIME_WAIT;
			printk(KERN_NOTICE
				"Clock: deleting leap second 23:59:59 UTC\n");
		}
		break;
	case TIME_OOP:
		time_tai++;
		time_state = TIME_WAIT;
		/* fall through */
		break;

	case TIME_WAIT:
		if (!(time_status & (STA_INS | STA_DEL)))
			time_state = TIME_OK;
		break;
	}
	spin_unlock_irqrestore(&ntp_lock, flags);

	/*
	 * We have to call this outside of the ntp_lock to keep
	 * the proper locking hierarchy
	 */
	if (leap)
		timekeeping_leap_insert(leap);

	return res;
}

/*
 * this routine handles the overflow of the microsecond field
 *
 * The tricky bits of code to handle the accurate clock support
 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 * They were originally developed for SUN and DEC kernels.
 * All the kudos should go to Dave for this stuff.
 */
void second_overflow(void)
{
	s64 delta;
	unsigned long flags;

	spin_lock_irqsave(&ntp_lock, flags);

	/* Bump the maxerror field */
	time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,8 +472,13 @@ void second_overflow(void)
	tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
							 << NTP_SCALE_SHIFT;
	time_adjust = 0;



out:
	spin_unlock_irqrestore(&ntp_lock, flags);

	return leap;
}

#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -544,27 +540,6 @@ static void notify_cmos_timer(void)
static inline void notify_cmos_timer(void) { }
#endif

/*
 * Start the leap seconds timer:
 */
static inline void ntp_start_leap_timer(struct timespec *ts)
{
	long now = ts->tv_sec;

	if (time_status & STA_INS) {
		time_state = TIME_INS;
		now += 86400 - now % 86400;
		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);

		return;
	}

	if (time_status & STA_DEL) {
		time_state = TIME_DEL;
		now += 86400 - (now + 1) % 86400;
		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
	}
}

/*
 * Propagate a new txc->status value into the NTP state:
@@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
	time_status &= STA_RONLY;
	time_status |= txc->status & ~STA_RONLY;

	switch (time_state) {
	case TIME_OK:
		ntp_start_leap_timer(ts);
		break;
	case TIME_INS:
	case TIME_DEL:
		time_state = TIME_OK;
		ntp_start_leap_timer(ts);
	case TIME_WAIT:
		if (!(time_status & (STA_INS | STA_DEL)))
			time_state = TIME_OK;
		break;
	case TIME_OOP:
		hrtimer_restart(&leap_timer);
		break;
	}
}
/*
 * Called with the xtime lock held, so we can access and modify
@@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc)
		    (txc->tick <  900000/USER_HZ ||
		     txc->tick > 1100000/USER_HZ))
			return -EINVAL;

		if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
			hrtimer_cancel(&leap_timer);
	}

	if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
	ntp_clear();
	hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
	leap_timer.function = ntp_leap_second;
}
+6 −14
Original line number Diff line number Diff line
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
}


void timekeeping_leap_insert(int leapsecond)
{
	unsigned long flags;

	write_seqlock_irqsave(&timekeeper.lock, flags);
	timekeeper.xtime.tv_sec += leapsecond;
	timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
	timekeeping_update(false);
	write_sequnlock_irqrestore(&timekeeper.lock, flags);

}

/**
 * timekeeping_forward_now - update clock to the current time
 *
@@ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)

	timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
	while (timekeeper.xtime_nsec >= nsecps) {
		int leap;
		timekeeper.xtime_nsec -= nsecps;
		timekeeper.xtime.tv_sec++;
		second_overflow();
		leap = second_overflow(timekeeper.xtime.tv_sec);
		timekeeper.xtime.tv_sec += leap;
	}

	/* Accumulate raw time */
@@ -1082,9 +1072,11 @@ static void update_wall_time(void)
	 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
	 */
	if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
		int leap;
		timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
		timekeeper.xtime.tv_sec++;
		second_overflow();
		leap = second_overflow(timekeeper.xtime.tv_sec);
		timekeeper.xtime.tv_sec += leap;
	}

	timekeeping_update(false);