Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0adb8f3d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fix from Ingo Molnar:
 "Fix a timer granularity handling race+bug, which would manifest itself
  by spuriously increasing timeouts of some timers (from 1 jiffy to ~500
  jiffies in the worst case measured) in certain nohz states"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  timers: Fix excessive granularity of new timers after a nohz idle
parents 53ede64d 2fe59f50
Loading
Loading
Loading
Loading
+41 −9
Original line number Diff line number Diff line
@@ -203,6 +203,7 @@ struct timer_base {
	bool			migration_enabled;
	bool			nohz_active;
	bool			is_idle;
	bool			must_forward_clk;
	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
	struct hlist_head	vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)

static inline void forward_timer_base(struct timer_base *base)
{
	unsigned long jnow = READ_ONCE(jiffies);
	unsigned long jnow;

	/*
	 * We only forward the base when it's idle and we have a delta between
	 * base clock and jiffies.
	 * We only forward the base when we are idle or have just come out of
	 * idle (must_forward_clk logic), and have a delta between base clock
	 * and jiffies. In the common case, run_timers will take care of it.
	 */
	if (!base->is_idle || (long) (jnow - base->clk) < 2)
	if (likely(!base->must_forward_clk))
		return;

	jnow = READ_ONCE(jiffies);
	base->must_forward_clk = base->is_idle;
	if ((long)(jnow - base->clk) < 2)
		return;

	/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
	 * same array bucket then just return:
	 */
	if (timer_pending(timer)) {
		/*
		 * The downside of this optimization is that it can result in
		 * larger granularity than you would get from adding a new
		 * timer with this expiry.
		 */
		if (timer->expires == expires)
			return 1;

@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
		 * dequeue/enqueue dance.
		 */
		base = lock_timer_base(timer, &flags);
		forward_timer_base(base);

		clk = base->clk;
		idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
		}
	} else {
		base = lock_timer_base(timer, &flags);
		forward_timer_base(base);
	}

	ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
			raw_spin_lock(&base->lock);
			WRITE_ONCE(timer->flags,
				   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
			forward_timer_base(base);
		}
	}

	/* Try to forward a stale timer base clock */
	forward_timer_base(base);

	timer->expires = expires;
	/*
	 * If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
		WRITE_ONCE(timer->flags,
			   (timer->flags & ~TIMER_BASEMASK) | cpu);
	}
	forward_timer_base(base);

	debug_activate(timer, timer->expires);
	internal_add_timer(base, timer);
@@ -1497,11 +1510,17 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
		if (!is_max_delta)
			expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
		/*
		 * If we expect to sleep more than a tick, mark the base idle:
		 */
		if ((expires - basem) > TICK_NSEC)
		 * If we expect to sleep more than a tick, mark the base idle.
		 * Also the tick is stopped so any added timer must forward
		 * the base clk itself to keep granularity small. This idle
		 * logic is only maintained for the BASE_STD base, deferrable
		 * timers may still see large granularity skew (by design).
		 */
		if ((expires - basem) > TICK_NSEC) {
			base->must_forward_clk = true;
			base->is_idle = true;
		}
	}
	raw_spin_unlock(&base->lock);

	return cmp_next_hrtimer_event(basem, expires);
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);

	/*
	 * must_forward_clk must be cleared before running timers so that any
	 * timer functions that call mod_timer will not try to forward the
	 * base. idle trcking / clock forwarding logic is only used with
	 * BASE_STD timers.
	 *
	 * The deferrable base does not do idle tracking at all, so we do
	 * not forward it. This can result in very large variations in
	 * granularity for deferrable timers, but they can be deferred for
	 * long periods due to idle.
	 */
	base->must_forward_clk = false;

	__run_timers(base);
	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));