Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 467f9957 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perfcounters-core-for-linus' of...

Merge branch 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (58 commits)
  perf_counter: Fix perf_copy_attr() pointer arithmetic
  perf utils: Use a define for the maximum length of a trace event
  perf: Add timechart help text and add timechart to "perf help"
  tracing, x86, cpuidle: Move the end point of a C state in the power tracer
  perf utils: Be consistent about minimum text size in the svghelper
  perf timechart: Add "perf timechart record"
  perf: Add the timechart tool
  perf: Add a SVG helper library file
  tracing, perf: Convert the power tracer into an event tracer
  perf: Add a sample_event type to the event_union
  perf: Allow perf utilities to have "callback" options without arguments
  perf: Store trace event name/id pairs in perf.data
  perf: Add a timestamp to fork events
  sched_clock: Make it NMI safe
  perf_counter: Fix up swcounter throttling
  x86, perf_counter, bts: Optimize BTS overflow handling
  perf sched: Add --input=file option to builtin-sched.c
  perf trace: Sample timestamp and cpu when using record flag
  perf tools: Increase MAX_EVENT_LENGTH
  perf tools: Fix memory leak in read_ftrace_printk()
  ...
parents 78f28b7c cdf8073d
Loading
Loading
Loading
Loading

Documentation/trace/power.txt

deleted100644 → 0
+0 −17
Original line number Diff line number Diff line
The power tracer collects detailed information about C-state and P-state
transitions, instead of just looking at the high-level "average"
information.

There is a helper script found in scrips/tracing/power.pl in the kernel
sources which can be used to parse this information and create a
Scalable Vector Graphics (SVG) picture from the trace data.

To use this tracer:

	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
	echo power > /sys/kernel/debug/tracing/current_tracer
	echo 1 > /sys/kernel/debug/tracing/tracing_enabled
	sleep 1
	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
	cat /sys/kernel/debug/tracing/trace | \
		perl scripts/tracing/power.pl > out.sv
+2 −5
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
#include <trace/power.h>
#include <trace/events/power.h>

#include <linux/acpi.h>
#include <linux/io.h>
@@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);

static DEFINE_PER_CPU(struct aperfmperf, old_perf);

DEFINE_TRACE(power_mark);

/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;

@@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
	unsigned int next_perf_state = 0; /* Index into perf table */
	unsigned int i;
	int result = 0;
	struct power_trace it;

	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);

@@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
		}
	}

	trace_power_mark(&it, POWER_PSTATE, next_perf_state);
	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);

	switch (data->cpu_feature) {
	case SYSTEM_INTEL_MSR_CAPABLE:
+37 −23
Original line number Diff line number Diff line
@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
#define BTS_RECORD_SIZE		24

/* The size of a per-cpu BTS buffer in bytes: */
#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 1024)
#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)

/* The BTS overflow threshold in bytes from the end of the buffer: */
#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 64)
#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)


/*
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
	local_irq_restore(flags);
}

static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
				       struct perf_sample_data *data)
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
{
	struct debug_store *ds = cpuc->ds;
	struct bts_record {
@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
		u64	flags;
	};
	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
	unsigned long orig_ip = data->regs->ip;
	struct bts_record *at, *top;
	struct perf_output_handle handle;
	struct perf_event_header header;
	struct perf_sample_data data;
	struct pt_regs regs;

	if (!counter)
		return;
@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
	top = (struct bts_record *)(unsigned long)ds->bts_index;

	if (top <= at)
		return;

	ds->bts_index = ds->bts_buffer_base;


	data.period	= counter->hw.last_period;
	data.addr	= 0;
	regs.ip		= 0;

	/*
	 * Prepare a generic sample, i.e. fill in the invariant fields.
	 * We will overwrite the from and to address before we output
	 * the sample.
	 */
	perf_prepare_sample(&header, &data, counter, &regs);

	if (perf_output_begin(&handle, counter,
			      header.size * (top - at), 1, 1))
		return;

	for (; at < top; at++) {
		data->regs->ip	= at->from;
		data->addr	= at->to;
		data.ip		= at->from;
		data.addr	= at->to;

		perf_counter_output(counter, 1, data);
		perf_output_sample(&handle, &header, &data, counter);
	}

	data->regs->ip	= orig_ip;
	data->addr	= 0;
	perf_output_end(&handle);

	/* There's new data available. */
	counter->hw.interrupts++;
	counter->pending_kill = POLL_IN;
}

@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
	x86_perf_counter_update(counter, hwc, idx);

	/* Drain the remaining BTS records. */
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
		struct perf_sample_data data;
		struct pt_regs regs;
	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
		intel_pmu_drain_bts_buffer(cpuc);

		data.regs = &regs;
		intel_pmu_drain_bts_buffer(cpuc, &data);
	}
	cpuc->counters[idx] = NULL;
	clear_bit(idx, cpuc->used_mask);

@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
	int idx, handled = 0;
	u64 val;

	data.regs = regs;
	data.addr = 0;

	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
		if (!x86_perf_counter_set_period(counter, hwc, idx))
			continue;

		if (perf_counter_overflow(counter, 1, &data))
		if (perf_counter_overflow(counter, 1, &data, regs))
			p6_pmu_disable_counter(hwc, idx);
	}

@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
	int bit, loops;
	u64 ack, status;

	data.regs = regs;
	data.addr = 0;

	cpuc = &__get_cpu_var(cpu_hw_counters);

	perf_disable();
	intel_pmu_drain_bts_buffer(cpuc, &data);
	intel_pmu_drain_bts_buffer(cpuc);
	status = intel_pmu_get_status();
	if (!status) {
		perf_enable();
@@ -1702,7 +1717,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)

		data.period = counter->hw.last_period;

		if (perf_counter_overflow(counter, 1, &data))
		if (perf_counter_overflow(counter, 1, &data, regs))
			intel_pmu_disable_counter(&counter->hw, bit);
	}

@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
	int idx, handled = 0;
	u64 val;

	data.regs = regs;
	data.addr = 0;

	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
		if (!x86_perf_counter_set_period(counter, hwc, idx))
			continue;

		if (perf_counter_overflow(counter, 1, &data))
		if (perf_counter_overflow(counter, 1, &data, regs))
			amd_pmu_disable_counter(hwc, idx);
	}

+6 −19
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
#include <trace/power.h>
#include <trace/events/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);

struct kmem_cache *task_xstate_cachep;

DEFINE_TRACE(power_start);
DEFINE_TRACE(power_end);

int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
	*dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
	if (hlt_use_halt()) {
		struct power_trace it;

		trace_power_start(&it, POWER_CSTATE, 1);
		trace_power_start(POWER_CSTATE, 1);
		current_thread_info()->status &= ~TS_POLLING;
		/*
		 * TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,6 @@ void default_idle(void)
		else
			local_irq_enable();
		current_thread_info()->status |= TS_POLLING;
		trace_power_end(&it);
	} else {
		local_irq_enable();
		/* loop is done by the caller */
@@ -372,9 +366,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 */
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
	struct power_trace it;

	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
	trace_power_start(POWER_CSTATE, (ax>>4)+1);
	if (!need_resched()) {
		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
			clflush((void *)&current_thread_info()->flags);
@@ -384,15 +376,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
		if (!need_resched())
			__mwait(ax, cx);
	}
	trace_power_end(&it);
}

/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
	struct power_trace it;
	if (!need_resched()) {
		trace_power_start(&it, POWER_CSTATE, 1);
		trace_power_start(POWER_CSTATE, 1);
		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
			clflush((void *)&current_thread_info()->flags);

@@ -402,7 +392,6 @@ static void mwait_idle(void)
			__sti_mwait(0, 0);
		else
			local_irq_enable();
		trace_power_end(&it);
	} else
		local_irq_enable();
}
@@ -414,13 +403,11 @@ static void mwait_idle(void)
 */
static void poll_idle(void)
{
	struct power_trace it;

	trace_power_start(&it, POWER_CSTATE, 0);
	trace_power_start(POWER_CSTATE, 0);
	local_irq_enable();
	while (!need_resched())
		cpu_relax();
	trace_power_end(&it);
	trace_power_end(0);
}

/*
+2 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/cpuidle.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <trace/events/power.h>

#include "cpuidle.h"

@@ -91,6 +92,7 @@ static void cpuidle_idle_call(void)
	/* give the governor an opportunity to reflect on the outcome */
	if (cpuidle_curr_governor->reflect)
		cpuidle_curr_governor->reflect(dev);
	trace_power_end(0);
}

/**
Loading