Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 887d9dc9 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Thomas Gleixner
Browse files

hrtimer: Allow hrtimer::function() to free the timer



Currently an hrtimer callback function cannot free its own timer
because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK
after it. Freeing the timer would result in a clear use-after-free.

Solve this by using a scheme similar to regular timers; track the
current running timer in hrtimer_clock_base::running.

Suggested-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: wanpeng.li@linux.intel.com
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.471563047@infradead.org


Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent c4bfa3f5
Loading
Loading
Loading
Loading
+16 −25
Original line number Original line Diff line number Diff line
@@ -53,30 +53,25 @@ enum hrtimer_restart {
 *
 *
 * 0x00		inactive
 * 0x00		inactive
 * 0x01		enqueued into rbtree
 * 0x01		enqueued into rbtree
 * 0x02		callback function running
 * 0x04		timer is migrated to another cpu
 *
 *
 * Special cases:
 * The callback state is not part of the timer->state because clearing it would
 * 0x03		callback function running and enqueued
 * mean touching the timer after the callback, this makes it impossible to free
 *		(was requeued on another CPU)
 * the timer from the callback function.
 * 0x05		timer was migrated on CPU hotunplug
 *
 *
 * The "callback function running and enqueued" status is only possible on
 * Therefore we track the callback state in:
 * SMP. It happens for example when a posix timer expired and the callback
 *
 *	timer->base->cpu_base->running == timer
 *
 * On SMP it is possible to have a "callback function running and enqueued"
 * status. It happens for example when a posix timer expired and the callback
 * queued a signal. Between dropping the lock which protects the posix timer
 * queued a signal. Between dropping the lock which protects the posix timer
 * and reacquiring the base lock of the hrtimer, another CPU can deliver the
 * and reacquiring the base lock of the hrtimer, another CPU can deliver the
 * signal and rearm the timer. We have to preserve the callback running state,
 * signal and rearm the timer.
 * as otherwise the timer could be removed before the softirq code finishes the
 * the handling of the timer.
 *
 * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
 * to preserve the HRTIMER_STATE_CALLBACK in the above scenario.
 *
 *
 * All state transitions are protected by cpu_base->lock.
 * All state transitions are protected by cpu_base->lock.
 */
 */
#define HRTIMER_STATE_INACTIVE	0x00
#define HRTIMER_STATE_INACTIVE	0x00
#define HRTIMER_STATE_ENQUEUED	0x01
#define HRTIMER_STATE_ENQUEUED	0x01
#define HRTIMER_STATE_CALLBACK	0x02


/**
/**
 * struct hrtimer - the basic hrtimer structure
 * struct hrtimer - the basic hrtimer structure
@@ -163,6 +158,8 @@ enum hrtimer_base_type {
 * struct hrtimer_cpu_base - the per cpu clock bases
 * struct hrtimer_cpu_base - the per cpu clock bases
 * @lock:		lock protecting the base and associated clock bases
 * @lock:		lock protecting the base and associated clock bases
 *			and timers
 *			and timers
 * @seq:		seqcount around __run_hrtimer
 * @running:		pointer to the currently running hrtimer
 * @cpu:		cpu number
 * @cpu:		cpu number
 * @active_bases:	Bitfield to mark bases with active timers
 * @active_bases:	Bitfield to mark bases with active timers
 * @clock_was_set_seq:	Sequence counter of clock was set events
 * @clock_was_set_seq:	Sequence counter of clock was set events
@@ -184,6 +181,8 @@ enum hrtimer_base_type {
 */
 */
struct hrtimer_cpu_base {
struct hrtimer_cpu_base {
	raw_spinlock_t			lock;
	raw_spinlock_t			lock;
	seqcount_t			seq;
	struct hrtimer			*running;
	unsigned int			cpu;
	unsigned int			cpu;
	unsigned int			active_bases;
	unsigned int			active_bases;
	unsigned int			clock_was_set_seq;
	unsigned int			clock_was_set_seq;
@@ -391,15 +390,7 @@ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);


extern u64 hrtimer_get_next_event(void);
extern u64 hrtimer_get_next_event(void);


/*
extern bool hrtimer_active(const struct hrtimer *timer);
 * A timer is active, when it is enqueued into the rbtree or the
 * callback function is running or it's in the state of being migrated
 * to another cpu.
 */
static inline int hrtimer_active(const struct hrtimer *timer)
{
	return timer->state != HRTIMER_STATE_INACTIVE;
}


/*
/*
 * Helper function to check, whether the timer is on one of the queues
 * Helper function to check, whether the timer is on one of the queues
@@ -415,7 +406,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
 */
 */
static inline int hrtimer_callback_running(struct hrtimer *timer)
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
{
	return timer->state & HRTIMER_STATE_CALLBACK;
	return timer->base->cpu_base->running == timer;
}
}


/* Forward a hrtimer so it expires after now: */
/* Forward a hrtimer so it expires after now: */
+91 −23
Original line number Original line Diff line number Diff line
@@ -67,6 +67,7 @@
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
{
	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
	.seq = SEQCNT_ZERO(hrtimer_bases.seq),
	.clock_base =
	.clock_base =
	{
	{
		{
		{
@@ -110,6 +111,18 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
 */
 */
#ifdef CONFIG_SMP
#ifdef CONFIG_SMP


/*
 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
 * such that hrtimer_callback_running() can unconditionally dereference
 * timer->base->cpu_base
 */
static struct hrtimer_cpu_base migration_cpu_base = {
	.seq = SEQCNT_ZERO(migration_cpu_base),
	.clock_base = { { .cpu_base = &migration_cpu_base, }, },
};

#define migration_base	migration_cpu_base.clock_base[0]

/*
/*
 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
 * means that all timers which are tied to this base via timer->base are
 * means that all timers which are tied to this base via timer->base are
@@ -119,8 +132,8 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
 * be found on the lists/queues.
 * be found on the lists/queues.
 *
 *
 * When the timer's base is locked, and the timer removed from list, it is
 * When the timer's base is locked, and the timer removed from list, it is
 * possible to set timer->base = NULL and drop the lock: the timer remains
 * possible to set timer->base = &migration_base and drop the lock: the timer
 * locked.
 * remains locked.
 */
 */
static
static
struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
@@ -130,7 +143,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,


	for (;;) {
	for (;;) {
		base = timer->base;
		base = timer->base;
		if (likely(base != NULL)) {
		if (likely(base != &migration_base)) {
			raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
			raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
			if (likely(base == timer->base))
			if (likely(base == timer->base))
				return base;
				return base;
@@ -194,8 +207,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
		if (unlikely(hrtimer_callback_running(timer)))
		if (unlikely(hrtimer_callback_running(timer)))
			return base;
			return base;


		/* See the comment in lock_timer_base() */
		/* See the comment in lock_hrtimer_base() */
		timer->base = NULL;
		timer->base = &migration_base;
		raw_spin_unlock(&base->cpu_base->lock);
		raw_spin_unlock(&base->cpu_base->lock);
		raw_spin_lock(&new_base->cpu_base->lock);
		raw_spin_lock(&new_base->cpu_base->lock);


@@ -838,11 +851,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,


	base->cpu_base->active_bases |= 1 << base->index;
	base->cpu_base->active_bases |= 1 << base->index;


	/*
	timer->state = HRTIMER_STATE_ENQUEUED;
	 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
	 * state of a possibly running callback.
	 */
	timer->state |= HRTIMER_STATE_ENQUEUED;


	return timerqueue_add(&base->active, &timer->node);
	return timerqueue_add(&base->active, &timer->node);
}
}
@@ -907,14 +916,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
		timer_stats_hrtimer_clear_start_info(timer);
		timer_stats_hrtimer_clear_start_info(timer);
		reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
		reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);


		if (!restart) {
		if (!restart)
			/*
			state = HRTIMER_STATE_INACTIVE;
			 * We must preserve the CALLBACK state flag here,

			 * otherwise we could move the timer base in
			 * switch_hrtimer_base.
			 */
			state &= HRTIMER_STATE_CALLBACK;
		}
		__remove_hrtimer(timer, base, state, reprogram);
		__remove_hrtimer(timer, base, state, reprogram);
		return 1;
		return 1;
	}
	}
@@ -1115,6 +1119,51 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
}
}
EXPORT_SYMBOL_GPL(hrtimer_init);
EXPORT_SYMBOL_GPL(hrtimer_init);


/*
 * A timer is active, when it is enqueued into the rbtree or the
 * callback function is running or it's in the state of being migrated
 * to another cpu.
 *
 * It is important for this function to not return a false negative.
 */
bool hrtimer_active(const struct hrtimer *timer)
{
	struct hrtimer_cpu_base *cpu_base;
	unsigned int seq;

	do {
		cpu_base = READ_ONCE(timer->base->cpu_base);
		seq = raw_read_seqcount_begin(&cpu_base->seq);

		if (timer->state != HRTIMER_STATE_INACTIVE ||
		    cpu_base->running == timer)
			return true;

	} while (read_seqcount_retry(&cpu_base->seq, seq) ||
		 cpu_base != READ_ONCE(timer->base->cpu_base));

	return false;
}
EXPORT_SYMBOL_GPL(hrtimer_active);

/*
 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
 * distinct sections:
 *
 *  - queued:	the timer is queued
 *  - callback:	the timer is being ran
 *  - post:	the timer is inactive or (re)queued
 *
 * On the read side we ensure we observe timer->state and cpu_base->running
 * from the same section, if anything changed while we looked at it, we retry.
 * This includes timer->base changing because sequence numbers alone are
 * insufficient for that.
 *
 * The sequence numbers are required because otherwise we could still observe
 * a false negative if the read side got smeared over multiple consequtive
 * __run_hrtimer() invocations.
 */

static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
			  struct hrtimer_clock_base *base,
			  struct hrtimer_clock_base *base,
			  struct hrtimer *timer, ktime_t *now)
			  struct hrtimer *timer, ktime_t *now)
@@ -1122,10 +1171,21 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
	enum hrtimer_restart (*fn)(struct hrtimer *);
	enum hrtimer_restart (*fn)(struct hrtimer *);
	int restart;
	int restart;


	WARN_ON(!irqs_disabled());
	lockdep_assert_held(&cpu_base->lock);


	debug_deactivate(timer);
	debug_deactivate(timer);
	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
	cpu_base->running = timer;

	/*
	 * Separate the ->running assignment from the ->state assignment.
	 *
	 * As with a regular write barrier, this ensures the read side in
	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
	 * timer->state == INACTIVE.
	 */
	raw_write_seqcount_barrier(&cpu_base->seq);

	__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
	timer_stats_account_hrtimer(timer);
	timer_stats_account_hrtimer(timer);
	fn = timer->function;
	fn = timer->function;


@@ -1141,7 +1201,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
	raw_spin_lock(&cpu_base->lock);
	raw_spin_lock(&cpu_base->lock);


	/*
	/*
	 * Note: We clear the CALLBACK bit after enqueue_hrtimer and
	 * Note: We clear the running state after enqueue_hrtimer and
	 * we do not reprogramm the event hardware. Happens either in
	 * we do not reprogramm the event hardware. Happens either in
	 * hrtimer_start_range_ns() or in hrtimer_interrupt()
	 * hrtimer_start_range_ns() or in hrtimer_interrupt()
	 *
	 *
@@ -1153,9 +1213,17 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
	    !(timer->state & HRTIMER_STATE_ENQUEUED))
	    !(timer->state & HRTIMER_STATE_ENQUEUED))
		enqueue_hrtimer(timer, base);
		enqueue_hrtimer(timer, base);


	WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK));
	/*
	 * Separate the ->running assignment from the ->state assignment.
	 *
	 * As with a regular write barrier, this ensures the read side in
	 * hrtimer_active() cannot observe cpu_base->running == NULL &&
	 * timer->state == INACTIVE.
	 */
	raw_write_seqcount_barrier(&cpu_base->seq);


	timer->state &= ~HRTIMER_STATE_CALLBACK;
	WARN_ON_ONCE(cpu_base->running != timer);
	cpu_base->running = NULL;
}
}


static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)