Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit aa9c4c0f authored by Ingo Molnar's avatar Ingo Molnar
Browse files

perfcounters: fix task clock counter



Impact: fix per task clock counter precision

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 7671581f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq)
	return sum;
}


/*
 * Lock/unlock the current runqueue - to extract task statistics:
 */
extern void curr_rq_lock_irq_save(unsigned long *flags);
extern void curr_rq_unlock_irq_restore(unsigned long *flags);
extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
extern unsigned long long task_delta_exec(struct task_struct *);

extern void account_user_time(struct task_struct *, cputime_t);
extern void account_user_time_scaled(struct task_struct *, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t);
+12 −5
Original line number Diff line number Diff line
@@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
	forget_original_parent(tsk);
	exit_task_namespaces(tsk);

	/*
	 * Flush inherited counters to the parent - before the parent
	 * gets woken up by child-exit notifications.
	 */
	perf_counter_exit_task(tsk);

	write_lock_irq(&tasklist_lock);
	if (group_dead)
		kill_orphaned_pgrp(tsk->group_leader, NULL);
@@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code)
	mpol_put(tsk->mempolicy);
	tsk->mempolicy = NULL;
#endif
	/*
	 * These must happen late, after the PID is not
	 * hashed anymore, but still at a point that may sleep:
	 */
	perf_counter_exit_task(tsk);
#ifdef CONFIG_FUTEX
	if (unlikely(!list_empty(&tsk->pi_state_list)))
		exit_pi_state_list(tsk);
@@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code)
	if (tsk->splice_pipe)
		__free_pipe_info(tsk->splice_pipe);

	/*
	 * These must happen late, after the PID is not
	 * hashed anymore, but still at a point that may sleep:
	 */
	perf_counter_exit_task(tsk);

	preempt_disable();
	/* causes final put_task_struct in finish_task_switch(). */
	tsk->state = TASK_DEAD;
+54 −16
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
#include <linux/perf_counter.h>

/*
@@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info)
	if (ctx->task && cpuctx->task_ctx != ctx)
		return;

	spin_lock_irqsave(&ctx->lock, flags);
	curr_rq_lock_irq_save(&flags);
	spin_lock(&ctx->lock);

	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
		counter->hw_ops->disable(counter);
@@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info)
			    perf_max_counters - perf_reserved_percpu);
	}

	spin_unlock_irqrestore(&ctx->lock, flags);
	spin_unlock(&ctx->lock);
	curr_rq_unlock_irq_restore(&flags);
}


@@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info)
	if (ctx->task && cpuctx->task_ctx != ctx)
		return;

	spin_lock_irqsave(&ctx->lock, flags);
	curr_rq_lock_irq_save(&flags);
	spin_lock(&ctx->lock);

	/*
	 * Protect the list operation against NMI by disabling the
@@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info)
	if (!ctx->task && cpuctx->max_pertask)
		cpuctx->max_pertask--;

	spin_unlock_irqrestore(&ctx->lock, flags);
	spin_unlock(&ctx->lock);
	curr_rq_unlock_irq_restore(&flags);
}

/*
@@ -438,15 +443,19 @@ int perf_counter_task_disable(void)
	struct task_struct *curr = current;
	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
	struct perf_counter *counter;
	unsigned long flags;
	u64 perf_flags;
	int cpu;

	if (likely(!ctx->nr_counters))
		return 0;

	local_irq_disable();
	curr_rq_lock_irq_save(&flags);
	cpu = smp_processor_id();

	/* force the update of the task clock: */
	__task_delta_exec(curr, 1);

	perf_counter_task_sched_out(curr, cpu);

	spin_lock(&ctx->lock);
@@ -463,7 +472,7 @@ int perf_counter_task_disable(void)

	spin_unlock(&ctx->lock);

	local_irq_enable();
	curr_rq_unlock_irq_restore(&flags);

	return 0;
}
@@ -473,15 +482,19 @@ int perf_counter_task_enable(void)
	struct task_struct *curr = current;
	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
	struct perf_counter *counter;
	unsigned long flags;
	u64 perf_flags;
	int cpu;

	if (likely(!ctx->nr_counters))
		return 0;

	local_irq_disable();
	curr_rq_lock_irq_save(&flags);
	cpu = smp_processor_id();

	/* force the update of the task clock: */
	__task_delta_exec(curr, 1);

	spin_lock(&ctx->lock);

	/*
@@ -493,6 +506,7 @@ int perf_counter_task_enable(void)
		if (counter->state != PERF_COUNTER_STATE_OFF)
			continue;
		counter->state = PERF_COUNTER_STATE_INACTIVE;
		counter->hw_event.disabled = 0;
	}
	hw_perf_restore(perf_flags);

@@ -500,7 +514,7 @@ int perf_counter_task_enable(void)

	perf_counter_task_sched_in(curr, cpu);

	local_irq_enable();
	curr_rq_unlock_irq_restore(&flags);

	return 0;
}
@@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
static void __read(void *info)
{
	struct perf_counter *counter = info;
	unsigned long flags;

	curr_rq_lock_irq_save(&flags);
	counter->hw_ops->read(counter);
	curr_rq_unlock_irq_restore(&flags);
}

static u64 perf_counter_read(struct perf_counter *counter)
@@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
	.read		= cpu_clock_perf_counter_read,
};

static void task_clock_perf_counter_update(struct perf_counter *counter)
/*
 * Called from within the scheduler:
 */
static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
{
	u64 prev, now;
	struct task_struct *curr = counter->task;
	u64 delta;

	WARN_ON_ONCE(counter->task != current);

	delta = __task_delta_exec(curr, update);

	return curr->se.sum_exec_runtime + delta;
}

static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
{
	u64 prev;
	s64 delta;

	prev = atomic64_read(&counter->hw.prev_count);
	now = current->se.sum_exec_runtime;

	atomic64_set(&counter->hw.prev_count, now);

@@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter)

static void task_clock_perf_counter_read(struct perf_counter *counter)
{
	task_clock_perf_counter_update(counter);
	u64 now = task_clock_perf_counter_val(counter, 1);

	task_clock_perf_counter_update(counter, now);
}

static void task_clock_perf_counter_enable(struct perf_counter *counter)
{
	atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime);
	u64 now = task_clock_perf_counter_val(counter, 0);

	atomic64_set(&counter->hw.prev_count, now);
}

static void task_clock_perf_counter_disable(struct perf_counter *counter)
{
	task_clock_perf_counter_update(counter);
	u64 now = task_clock_perf_counter_val(counter, 0);

	task_clock_perf_counter_update(counter, now);
}

static const struct hw_perf_counter_ops perf_ops_task_clock = {
@@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child,
{
	struct perf_counter *parent_counter;
	u64 parent_val, child_val;
	unsigned long flags;
	u64 perf_flags;

	/*
@@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child,
	 * Be careful about zapping the list - IRQ/NMI context
	 * could still be processing it:
	 */
	local_irq_disable();
	curr_rq_lock_irq_save(&flags);
	perf_flags = hw_perf_save_disable();

	if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
@@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child,
	list_del_init(&child_counter->list_entry);

	hw_perf_restore(perf_flags);
	local_irq_enable();
	curr_rq_unlock_irq_restore(&flags);

	parent_counter = child_counter->parent;
	/*
+46 −3
Original line number Diff line number Diff line
@@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq)
#define task_rq(p)		cpu_rq(task_cpu(p))
#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)

static inline void update_rq_clock(struct rq *rq)
inline void update_rq_clock(struct rq *rq)
{
	rq->clock = sched_clock_cpu(cpu_of(rq));
}
@@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
	}
}

void curr_rq_lock_irq_save(unsigned long *flags)
	__acquires(rq->lock)
{
	struct rq *rq;

	local_irq_save(*flags);
	rq = cpu_rq(smp_processor_id());
	spin_lock(&rq->lock);
}

void curr_rq_unlock_irq_restore(unsigned long *flags)
	__releases(rq->lock)
{
	struct rq *rq;

	rq = cpu_rq(smp_processor_id());
	spin_unlock(&rq->lock);
	local_irq_restore(*flags);
}

void task_rq_unlock_wait(struct task_struct *p)
{
	struct rq *rq = task_rq(p);
@@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
		    struct task_struct *next)
{
	fire_sched_out_preempt_notifiers(prev, next);
	perf_counter_task_sched_out(prev, cpu_of(rq));
	prepare_lock_switch(rq, next);
	prepare_arch_switch(next);
}
@@ -4089,6 +4108,29 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);

EXPORT_PER_CPU_SYMBOL(kstat);

/*
 * Return any ns on the sched_clock that have not yet been banked in
 * @p in case that task is currently running.
 */
unsigned long long __task_delta_exec(struct task_struct *p, int update)
{
	s64 delta_exec;
	struct rq *rq;

	rq = task_rq(p);
	WARN_ON_ONCE(!runqueue_is_locked());
	WARN_ON_ONCE(!task_current(rq, p));

	if (update)
		update_rq_clock(rq);

	delta_exec = rq->clock - p->se.exec_start;

	WARN_ON_ONCE(delta_exec < 0);

	return delta_exec;
}

/*
 * Return any ns on the sched_clock that have not yet been banked in
 * @p in case that task is currently running.
@@ -4316,13 +4358,13 @@ void scheduler_tick(void)
	update_rq_clock(rq);
	update_cpu_load(rq);
	curr->sched_class->task_tick(rq, curr, 0);
	perf_counter_task_tick(curr, cpu);
	spin_unlock(&rq->lock);

#ifdef CONFIG_SMP
	rq->idle_at_tick = idle_cpu(cpu);
	trigger_load_balance(rq, cpu);
#endif
	perf_counter_task_tick(curr, cpu);
}

#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
@@ -4512,6 +4554,7 @@ asmlinkage void __sched schedule(void)

	if (likely(prev != next)) {
		sched_info_switch(prev, next);
		perf_counter_task_sched_out(prev, cpu);

		rq->nr_switches++;
		rq->curr = next;