Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 60bda037 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

posix-cpu-timers: Utilize timerqueue for storage



Using a linear O(N) search for timer insertion affects execution time and
D-cache footprint badly with a larger number of timers.

Switch the storage to a timerqueue which is already used for hrtimers and
alarmtimers. It does not affect the size of struct k_itimer as it.alarm is
still larger.

The extra list head for the expiry list will go away later once the expiry
is moved into task work context.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908272129220.1939@nanos.tec.linutronix.de
parent 244d49e3
Loading
Loading
Loading
Loading
+49 −16
Original line number Diff line number Diff line
@@ -5,17 +5,11 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/alarmtimer.h>
#include <linux/timerqueue.h>

struct kernel_siginfo;
struct task_struct;

struct cpu_timer_list {
	struct list_head entry;
	u64 expires;
	struct task_struct *task;
	int firing;
};

/*
 * Bit fields within a clockid:
 *
@@ -64,14 +58,58 @@ static inline int clockid_to_fd(const clockid_t clk)

#ifdef CONFIG_POSIX_TIMERS

/**
 * cpu_timer - Posix CPU timer representation for k_itimer
 * @node:	timerqueue node to queue in the task/sig
 * @head:	timerqueue head on which this timer is queued
 * @task:	Pointer to target task
 * @elist:	List head for the expiry list
 * @firing:	Timer is currently firing
 */
struct cpu_timer {
	struct timerqueue_node	node;
	struct timerqueue_head	*head;
	struct task_struct	*task;
	struct list_head	elist;
	int			firing;
};

static inline bool cpu_timer_requeue(struct cpu_timer *ctmr)
{
	return timerqueue_add(ctmr->head, &ctmr->node);
}

static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
				     struct cpu_timer *ctmr)
{
	ctmr->head = head;
	return timerqueue_add(head, &ctmr->node);
}

static inline void cpu_timer_dequeue(struct cpu_timer *ctmr)
{
	if (!RB_EMPTY_NODE(&ctmr->node.node))
		timerqueue_del(ctmr->head, &ctmr->node);
}

static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr)
{
	return ctmr->node.expires;
}

static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp)
{
	ctmr->node.expires = exp;
}

/**
 * posix_cputimer_base - Container per posix CPU clock
 * @nextevt:		Earliest-expiration cache
 * @cpu_timers:		List heads to queue posix CPU timers
 * @tqhead:		timerqueue head for cpu_timers
 */
struct posix_cputimer_base {
	u64			nextevt;
	struct list_head	cpu_timers;
	struct timerqueue_head	tqhead;
};

/**
@@ -92,14 +130,10 @@ struct posix_cputimers {

static inline void posix_cputimers_init(struct posix_cputimers *pct)
{
	pct->timers_active = 0;
	pct->expiry_active = 0;
	memset(pct, 0, sizeof(*pct));
	pct->bases[0].nextevt = U64_MAX;
	pct->bases[1].nextevt = U64_MAX;
	pct->bases[2].nextevt = U64_MAX;
	INIT_LIST_HEAD(&pct->bases[0].cpu_timers);
	INIT_LIST_HEAD(&pct->bases[1].cpu_timers);
	INIT_LIST_HEAD(&pct->bases[2].cpu_timers);
}

void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
@@ -113,7 +147,6 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
/* Init task static initializer */
#define INIT_CPU_TIMERBASE(b) {						\
	.nextevt	= U64_MAX,					\
	.cpu_timers	= LIST_HEAD_INIT(b.cpu_timers),			\
}

#define INIT_CPU_TIMERBASES(b) {					\
@@ -182,7 +215,7 @@ struct k_itimer {
		struct {
			struct hrtimer	timer;
		} real;
		struct cpu_timer_list	cpu;
		struct cpu_timer	cpu;
		struct {
			struct alarm	alarmtimer;
		} alarm;
+10 −0
Original line number Diff line number Diff line
@@ -43,6 +43,16 @@ static inline void timerqueue_init(struct timerqueue_node *node)
	RB_CLEAR_NODE(&node->node);
}

static inline bool timerqueue_node_queued(struct timerqueue_node *node)
{
	return !RB_EMPTY_NODE(&node->node);
}

static inline bool timerqueue_node_expires(struct timerqueue_node *node)
{
	return node->expires;
}

static inline void timerqueue_init_head(struct timerqueue_head *head)
{
	head->rb_root = RB_ROOT_CACHED;
+96 −93
Original line number Diff line number Diff line
@@ -96,19 +96,19 @@ static inline int validate_clock_permissions(const clockid_t clock)
 * Update expiry time from increment, and increase overrun count,
 * given the current clock sample.
 */
static void bump_cpu_timer(struct k_itimer *timer, u64 now)
static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
{
	u64 delta, incr, expires = timer->it.cpu.node.expires;
	int i;
	u64 delta, incr;

	if (!timer->it_interval)
		return;
		return expires;

	if (now < timer->it.cpu.expires)
		return;
	if (now < expires)
		return expires;

	incr = timer->it_interval;
	delta = now + incr - timer->it.cpu.expires;
	delta = now + incr - expires;

	/* Don't use (incr*2 < delta), incr*2 might overflow. */
	for (i = 0; incr < delta - incr; i++)
@@ -118,10 +118,11 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
		if (delta < incr)
			continue;

		timer->it.cpu.expires += incr;
		timer->it.cpu.node.expires += incr;
		timer->it_overrun += 1LL << i;
		delta -= incr;
	}
	return timer->it.cpu.node.expires;
}

/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
@@ -365,7 +366,7 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
		return -EINVAL;

	new_timer->kclock = &clock_posix_cpu;
	INIT_LIST_HEAD(&new_timer->it.cpu.entry);
	timerqueue_init(&new_timer->it.cpu.node);
	new_timer->it.cpu.task = p;
	return 0;
}
@@ -378,10 +379,11 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
 */
static int posix_cpu_timer_del(struct k_itimer *timer)
{
	int ret = 0;
	unsigned long flags;
	struct cpu_timer *ctmr = &timer->it.cpu;
	struct task_struct *p = ctmr->task;
	struct sighand_struct *sighand;
	struct task_struct *p = timer->it.cpu.task;
	unsigned long flags;
	int ret = 0;

	if (WARN_ON_ONCE(!p))
		return -EINVAL;
@@ -393,15 +395,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
	sighand = lock_task_sighand(p, &flags);
	if (unlikely(sighand == NULL)) {
		/*
		 * We raced with the reaping of the task.
		 * The deletion should have cleared us off the list.
		 * This raced with the reaping of the task. The exit cleanup
		 * should have removed this timer from the timer queue.
		 */
		WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
		WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
	} else {
		if (timer->it.cpu.firing)
			ret = TIMER_RETRY;
		else
			list_del(&timer->it.cpu.entry);
			cpu_timer_dequeue(ctmr);

		unlock_task_sighand(p, &flags);
	}
@@ -412,12 +414,16 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
	return ret;
}

static void cleanup_timers_list(struct list_head *head)
static void cleanup_timerqueue(struct timerqueue_head *head)
{
	struct cpu_timer_list *timer, *next;
	struct timerqueue_node *node;
	struct cpu_timer *ctmr;

	list_for_each_entry_safe(timer, next, head, entry)
		list_del_init(&timer->entry);
	while ((node = timerqueue_getnext(head))) {
		timerqueue_del(head, node);
		ctmr = container_of(node, struct cpu_timer, node);
		ctmr->head = NULL;
	}
}

/*
@@ -429,9 +435,9 @@ static void cleanup_timers_list(struct list_head *head)
 */
static void cleanup_timers(struct posix_cputimers *pct)
{
	cleanup_timers_list(&pct->bases[CPUCLOCK_PROF].cpu_timers);
	cleanup_timers_list(&pct->bases[CPUCLOCK_VIRT].cpu_timers);
	cleanup_timers_list(&pct->bases[CPUCLOCK_SCHED].cpu_timers);
	cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
	cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
	cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
}

/*
@@ -454,28 +460,18 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 */
static void arm_timer(struct k_itimer *timer)
{
	struct cpu_timer_list *const nt = &timer->it.cpu;
	int clkidx = CPUCLOCK_WHICH(timer->it_clock);
	struct task_struct *p = timer->it.cpu.task;
	u64 newexp = timer->it.cpu.expires;
	struct cpu_timer *ctmr = &timer->it.cpu;
	u64 newexp = cpu_timer_getexpires(ctmr);
	struct task_struct *p = ctmr->task;
	struct posix_cputimer_base *base;
	struct list_head *head, *listpos;
	struct cpu_timer_list *next;

	if (CPUCLOCK_PERTHREAD(timer->it_clock))
		base = p->posix_cputimers.bases + clkidx;
	else
		base = p->signal->posix_cputimers.bases + clkidx;

	listpos = head = &base->cpu_timers;
	list_for_each_entry(next,head, entry) {
		if (nt->expires < next->expires)
			break;
		listpos = &next->entry;
	}
	list_add(&nt->entry, listpos);

	if (listpos != head)
	if (!cpu_timer_enqueue(&base->tqhead, ctmr))
		return;

	/*
@@ -498,24 +494,26 @@ static void arm_timer(struct k_itimer *timer)
 */
static void cpu_timer_fire(struct k_itimer *timer)
{
	struct cpu_timer *ctmr = &timer->it.cpu;

	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
		/*
		 * User don't want any signal.
		 */
		timer->it.cpu.expires = 0;
		cpu_timer_setexpires(ctmr, 0);
	} else if (unlikely(timer->sigq == NULL)) {
		/*
		 * This a special case for clock_nanosleep,
		 * not a normal timer from sys_timer_create.
		 */
		wake_up_process(timer->it_process);
		timer->it.cpu.expires = 0;
		cpu_timer_setexpires(ctmr, 0);
	} else if (!timer->it_interval) {
		/*
		 * One-shot timer.  Clear it as soon as it's fired.
		 */
		posix_timer_event(timer, 0);
		timer->it.cpu.expires = 0;
		cpu_timer_setexpires(ctmr, 0);
	} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
		/*
		 * The signal did not get queued because the signal
@@ -539,10 +537,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
{
	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
	u64 old_expires, new_expires, old_incr, val;
	struct task_struct *p = timer->it.cpu.task;
	struct cpu_timer *ctmr = &timer->it.cpu;
	struct task_struct *p = ctmr->task;
	struct sighand_struct *sighand;
	unsigned long flags;
	int ret;
	int ret = 0;

	if (WARN_ON_ONCE(!p))
		return -EINVAL;
@@ -562,22 +561,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
	 * If p has just been reaped, we can no
	 * longer get any information about it at all.
	 */
	if (unlikely(sighand == NULL)) {
	if (unlikely(sighand == NULL))
		return -ESRCH;
	}

	/*
	 * Disarm any old timer after extracting its expiry time.
	 */

	ret = 0;
	old_incr = timer->it_interval;
	old_expires = timer->it.cpu.expires;
	old_expires = cpu_timer_getexpires(ctmr);

	if (unlikely(timer->it.cpu.firing)) {
		timer->it.cpu.firing = -1;
		ret = TIMER_RETRY;
	} else
		list_del_init(&timer->it.cpu.entry);
	} else {
		cpu_timer_dequeue(ctmr);
	}

	/*
	 * We need to sample the current value to convert the new
@@ -598,18 +596,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
			old->it_value.tv_nsec = 0;
		} else {
			/*
			 * Update the timer in case it has
			 * overrun already.  If it has,
			 * we'll report it as having overrun
			 * and with the next reloaded timer
			 * already ticking, though we are
			 * swallowing that pending
			 * notification here to install the
			 * new setting.
			 */
			bump_cpu_timer(timer, val);
			if (val < timer->it.cpu.expires) {
				old_expires = timer->it.cpu.expires - val;
			 * Update the timer in case it has overrun already.
			 * If it has, we'll report it as having overrun and
			 * with the next reloaded timer already ticking,
			 * though we are swallowing that pending
			 * notification here to install the new setting.
			 */
			u64 exp = bump_cpu_timer(timer, val);

			if (val < exp) {
				old_expires = exp - val;
				old->it_value = ns_to_timespec64(old_expires);
			} else {
				old->it_value.tv_nsec = 1;
@@ -638,7 +634,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
	 * For a timer with no notification action, we don't actually
	 * arm the timer (we'll just fake it for timer_gettime).
	 */
	timer->it.cpu.expires = new_expires;
	cpu_timer_setexpires(ctmr, new_expires);
	if (new_expires != 0 && val < new_expires) {
		arm_timer(timer);
	}
@@ -680,8 +676,9 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
{
	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
	struct task_struct *p = timer->it.cpu.task;
	u64 now;
	struct cpu_timer *ctmr = &timer->it.cpu;
	u64 now, expires = cpu_timer_getexpires(ctmr);
	struct task_struct *p = ctmr->task;

	if (WARN_ON_ONCE(!p))
		return;
@@ -691,7 +688,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
	 */
	itp->it_interval = ktime_to_timespec64(timer->it_interval);

	if (!timer->it.cpu.expires)
	if (!expires)
		return;

	/*
@@ -713,9 +710,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
			/*
			 * The process has been reaped.
			 * We can't even collect a sample any more.
			 * Call the timer disarmed, nothing else to do.
			 * Disarm the timer, nothing else to do.
			 */
			timer->it.cpu.expires = 0;
			cpu_timer_setexpires(ctmr, 0);
			return;
		} else {
			now = cpu_clock_sample_group(clkid, p, false);
@@ -723,8 +720,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
		}
	}

	if (now < timer->it.cpu.expires) {
		itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now);
	if (now < expires) {
		itp->it_value = ns_to_timespec64(expires - now);
	} else {
		/*
		 * The timer should have expired already, but the firing
@@ -735,36 +732,40 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
	}
}

static unsigned long long
check_timers_list(struct list_head *timers,
		  struct list_head *firing,
		  unsigned long long curr)
{
	int maxfire = 20;
#define MAX_COLLECTED	20

	while (!list_empty(timers)) {
		struct cpu_timer_list *t;
static u64 collect_timerqueue(struct timerqueue_head *head,
			      struct list_head *firing, u64 now)
{
	struct timerqueue_node *next;
	int i = 0;

		t = list_first_entry(timers, struct cpu_timer_list, entry);
	while ((next = timerqueue_getnext(head))) {
		struct cpu_timer *ctmr;
		u64 expires;

		if (!--maxfire || curr < t->expires)
			return t->expires;
		ctmr = container_of(next, struct cpu_timer, node);
		expires = cpu_timer_getexpires(ctmr);
		/* Limit the number of timers to expire at once */
		if (++i == MAX_COLLECTED || now < expires)
			return expires;

		t->firing = 1;
		list_move_tail(&t->entry, firing);
		ctmr->firing = 1;
		cpu_timer_dequeue(ctmr);
		list_add_tail(&ctmr->elist, firing);
	}

	return U64_MAX;
}

static void collect_posix_cputimers(struct posix_cputimers *pct,
				    u64 *samples, struct list_head *firing)
static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
				    struct list_head *firing)
{
	struct posix_cputimer_base *base = pct->bases;
	int i;

	for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
		base->nextevt = check_timers_list(&base->cpu_timers, firing,
		base->nextevt = collect_timerqueue(&base->tqhead, firing,
						    samples[i]);
	}
}
@@ -948,7 +949,8 @@ static void check_process_timers(struct task_struct *tsk,
static void posix_cpu_timer_rearm(struct k_itimer *timer)
{
	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
	struct task_struct *p = timer->it.cpu.task;
	struct cpu_timer *ctmr = &timer->it.cpu;
	struct task_struct *p = ctmr->task;
	struct sighand_struct *sighand;
	unsigned long flags;
	u64 now;
@@ -980,7 +982,7 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
			 * The process has been reaped.
			 * We can't even collect a sample any more.
			 */
			timer->it.cpu.expires = 0;
			cpu_timer_setexpires(ctmr, 0);
			return;
		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
			/* If the process is dying, no need to rearm */
@@ -1124,11 +1126,11 @@ void run_posix_cpu_timers(void)
	 * each timer's lock before clearing its firing flag, so no
	 * timer call will interfere.
	 */
	list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
	list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
		int cpu_firing;

		spin_lock(&timer->it_lock);
		list_del_init(&timer->it.cpu.entry);
		list_del_init(&timer->it.cpu.elist);
		cpu_firing = timer->it.cpu.firing;
		timer->it.cpu.firing = 0;
		/*
@@ -1204,6 +1206,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
	timer.it_overrun = -1;
	error = posix_cpu_timer_create(&timer);
	timer.it_process = current;

	if (!error) {
		static struct itimerspec64 zero_it;
		struct restart_block *restart;
@@ -1219,7 +1222,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
		}

		while (!signal_pending(current)) {
			if (timer.it.cpu.expires == 0) {
			if (!cpu_timer_getexpires(&timer.it.cpu)) {
				/*
				 * Our timer fired and was reset, below
				 * deletion can not fail.
@@ -1241,7 +1244,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
		/*
		 * We were interrupted by a signal.
		 */
		expires = timer.it.cpu.expires;
		expires = cpu_timer_getexpires(&timer.it.cpu);
		error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
		if (!error) {
			/*