Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9ec26907 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

timerfd: Manage cancelable timers in timerfd



Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the
timer interrupt. Yes, I did not think about it, because the solution
was so elegant. I didn't like the extra list in timerfd when it was
proposed some time ago, but with a rcu based list the list walk it's
less horrible than the original global lock, which was held over the
list iteration.

Requested-by: default avatarPeter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarPeter Zijlstra <peterz@infradead.org>
parent 250f972d
Loading
Loading
Loading
Loading
+74 −31
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include <linux/anon_inodes.h>
#include <linux/timerfd.h>
#include <linux/syscalls.h>
#include <linux/rcupdate.h>

struct timerfd_ctx {
	struct hrtimer tmr;
@@ -31,9 +32,14 @@ struct timerfd_ctx {
	u64 ticks;
	int expired;
	int clockid;
	struct rcu_head rcu;
	struct list_head clist;
	bool might_cancel;
};

static LIST_HEAD(cancel_list);
static DEFINE_SPINLOCK(cancel_lock);

/*
 * This gets called when the timer event triggers. We set the "expired"
 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
	return HRTIMER_NORESTART;
}

static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
/*
 * Called when the clock was set to cancel the timers in the cancel
 * list.
 */
void timerfd_clock_was_set(void)
{
	ktime_t remaining;
	ktime_t moffs = ktime_get_monotonic_offset();
	struct timerfd_ctx *ctx;
	unsigned long flags;

	remaining = hrtimer_expires_remaining(&ctx->tmr);
	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
	rcu_read_lock();
	list_for_each_entry_rcu(ctx, &cancel_list, clist) {
		if (!ctx->might_cancel)
			continue;
		spin_lock_irqsave(&ctx->wqh.lock, flags);
		if (ctx->moffs.tv64 != moffs.tv64) {
			ctx->moffs.tv64 = KTIME_MAX;
			wake_up_locked(&ctx->wqh);
		}
		spin_unlock_irqrestore(&ctx->wqh.lock, flags);
	}
	rcu_read_unlock();
}

static bool timerfd_canceled(struct timerfd_ctx *ctx)
static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
{
	ktime_t moffs;
	if (ctx->might_cancel) {
		ctx->might_cancel = false;
		spin_lock(&cancel_lock);
		list_del_rcu(&ctx->clist);
		spin_unlock(&cancel_lock);
	}
}

	if (!ctx->might_cancel)
static bool timerfd_canceled(struct timerfd_ctx *ctx)
{
	if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
		return false;
	ctx->moffs = ktime_get_monotonic_offset();
	return true;
}

	moffs = ktime_get_monotonic_offset();
static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
{
	if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
	    (flags & TFD_TIMER_CANCEL_ON_SET)) {
		if (!ctx->might_cancel) {
			ctx->might_cancel = true;
			spin_lock(&cancel_lock);
			list_add_rcu(&ctx->clist, &cancel_list);
			spin_unlock(&cancel_lock);
		}
	} else if (ctx->might_cancel) {
		timerfd_remove_cancel(ctx);
	}
}

	if (moffs.tv64 == ctx->moffs.tv64)
		return false;
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
{
	ktime_t remaining;

	ctx->moffs = moffs;
	return true;
	remaining = hrtimer_expires_remaining(&ctx->tmr);
	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
}

static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
@@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
	htmode = (flags & TFD_TIMER_ABSTIME) ?
		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;

	ctx->might_cancel = false;
	if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
	    (flags & TFD_TIMER_CANCELON_SET)) {
		clockid = CLOCK_REALTIME_COS;
		ctx->might_cancel = true;
	}

	texp = timespec_to_ktime(ktmr->it_value);
	ctx->expired = 0;
	ctx->ticks = 0;
@@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file)
{
	struct timerfd_ctx *ctx = file->private_data;

	timerfd_remove_cancel(ctx);
	hrtimer_cancel(&ctx->tmr);
	kfree(ctx);
	kfree_rcu(ctx, rcu);
	return 0;
}

@@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
	else
		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);

	if (ctx->ticks) {
		ticks = ctx->ticks;

	/*
	 * If clock has changed, we do not care about the
	 * ticks and we do not rearm the timer. Userspace must
	 * reevaluate anyway.
	 */
	if (timerfd_canceled(ctx)) {
			ticks = 0;
		ctx->ticks = 0;
		ctx->expired = 0;
		res = -ECANCELED;
	}

	if (ctx->ticks) {
		ticks = ctx->ticks;

		if (ctx->expired && ctx->tintv.tv64) {
			/*
			 * If tintv.tv64 != 0, this is a periodic timer that
@@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
		return PTR_ERR(file);
	ctx = file->private_data;

	timerfd_setup_cancel(ctx, flags);

	/*
	 * We need to stop the existing timer before reprogramming
	 * it to the new values.
+5 −1
Original line number Diff line number Diff line
@@ -155,7 +155,6 @@ enum hrtimer_base_type {
	HRTIMER_BASE_REALTIME,
	HRTIMER_BASE_MONOTONIC,
	HRTIMER_BASE_BOOTTIME,
	HRTIMER_BASE_REALTIME_COS,
	HRTIMER_MAX_CLOCK_BASES,
};

@@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
#endif

extern void clock_was_set(void);
#ifdef CONFIG_TIMERFD
extern void timerfd_clock_was_set(void);
#else
static inline void timerfd_clock_was_set(void) { }
#endif
extern void hrtimers_resume(void);

extern ktime_t ktime_get(void);
+0 −6
Original line number Diff line number Diff line
@@ -302,12 +302,6 @@ struct itimerval {
 * The IDs of various hardware clocks:
 */
#define CLOCK_SGI_CYCLE			10

#ifdef __KERNEL__
/* This clock is not exposed to user space */
#define CLOCK_REALTIME_COS		15
#endif

#define MAX_CLOCKS			16
#define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
#define CLOCKS_MONO			CLOCK_MONOTONIC
+2 −2
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@
 * shared O_* flags.
 */
#define TFD_TIMER_ABSTIME (1 << 0)
#define TFD_TIMER_CANCELON_SET (1 << 1)
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
#define TFD_CLOEXEC O_CLOEXEC
#define TFD_NONBLOCK O_NONBLOCK

@@ -27,6 +27,6 @@
/* Flags for timerfd_create.  */
#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
/* Flags for timerfd_settime.  */
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)

#endif /* _LINUX_TIMERFD_H */
+32 −62
Original line number Diff line number Diff line
@@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
			.get_time = &ktime_get_boottime,
			.resolution = KTIME_LOW_RES,
		},
		{
			.index = CLOCK_REALTIME_COS,
			.get_time = &ktime_get_real,
			.resolution = KTIME_LOW_RES,
		},
	}
};

@@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
	[CLOCK_REALTIME_COS]	= HRTIMER_BASE_REALTIME_COS,
};

static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
	base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
}

/*
@@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer)
	trace_hrtimer_cancel(timer);
}

static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);

/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS

@@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
	return 0;
}

static void retrigger_next_event(void *arg);
/*
 * Retrigger next event is called after clock was set
 *
 * Called with interrupts disabled via on_each_cpu()
 */
static void retrigger_next_event(void *arg)
{
	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
	struct timespec realtime_offset, xtim, wtm, sleep;

	if (!hrtimer_hres_active())
		return;

	/* Optimized out for !HIGH_RES */
	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);

	/* Adjust CLOCK_REALTIME offset */
	raw_spin_lock(&base->lock);
	base->clock_base[HRTIMER_BASE_REALTIME].offset =
		timespec_to_ktime(realtime_offset);
	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
		timespec_to_ktime(sleep);

	hrtimer_force_reprogram(base, 0);
	raw_spin_unlock(&base->lock);
}

/*
 * Switch to high resolution mode
@@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
	return 0;
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void retrigger_next_event(void *arg) { }

#endif /* CONFIG_HIGH_RES_TIMERS */

/*
 * Retrigger next event is called after clock was set
 *
 * Called with interrupts disabled via on_each_cpu()
 */
static void retrigger_next_event(void *arg)
{
	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
	struct timespec realtime_offset, xtim, wtm, sleep;

	if (!hrtimer_hres_active()) {
		raw_spin_lock(&base->lock);
		hrtimer_expire_cancelable(base);
		raw_spin_unlock(&base->lock);
		return;
	}

	/* Optimized out for !HIGH_RES */
	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);

	/* Adjust CLOCK_REALTIME offset */
	raw_spin_lock(&base->lock);
	base->clock_base[HRTIMER_BASE_REALTIME].offset =
		timespec_to_ktime(realtime_offset);
	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
		timespec_to_ktime(sleep);
	base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
		timespec_to_ktime(realtime_offset);

	hrtimer_expire_cancelable(base);

	hrtimer_force_reprogram(base, 0);
	raw_spin_unlock(&base->lock);
}

/*
 * Clock realtime was set
 *
@@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg)
 */
void clock_was_set(void)
{
#ifdef CONFIG_HIGHRES_TIMERS
	/* Retrigger the CPU local events everywhere */
	on_each_cpu(retrigger_next_event, NULL, 1);
#endif
	timerfd_clock_was_set();
}

/*
@@ -777,6 +762,7 @@ void hrtimers_resume(void)
		  KERN_INFO "hrtimers_resume() called with IRQs enabled!");

	retrigger_next_event(NULL);
	timerfd_clock_was_set();
}

static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
@@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
	timer->state &= ~HRTIMER_STATE_CALLBACK;
}

static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
{
	struct timerqueue_node *node;
	struct hrtimer_clock_base *base;
	ktime_t now = ktime_get_real();

	base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];

	while ((node = timerqueue_getnext(&base->active))) {
			struct hrtimer *timer;

			timer = container_of(node, struct hrtimer, node);
			__run_hrtimer(timer, &now);
	}
}

#ifdef CONFIG_HIGH_RES_TIMERS

/*