Merge tag 'perf-core-for-mingo-20160606' of... (aa3a655b) · Commits · e / devices / android_kernel_oneplus_sm7250

tools/lib/api/Makefile

+1 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,7 @@ endif

		CC = $(CROSS_COMPILE)gcc
		AR = $(CROSS_COMPILE)ar
		LD = $(CROSS_COMPILE)ld

		MAKEFLAGS += --no-print-directory

tools/perf/Documentation/perf-stat.txt

+32 −0

Original line number	Diff line number	Diff line
		@@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements.
		--no-aggr::
		Do not aggregate counts across all monitored CPUs.

		--topdown::
		Print top down level 1 metrics if supported by the CPU. This allows to
		determine bottle necks in the CPU pipeline for CPU bound workloads,
		by breaking the cycles consumed down into frontend bound, backend bound,
		bad speculation and retiring.

		Frontend bound means that the CPU cannot fetch and decode instructions fast
		enough. Backend bound means that computation or memory access is the bottle
		neck. Bad Speculation means that the CPU wasted cycles due to branch
		mispredictions and similar issues. Retiring means that the CPU computed without
		an apparently bottleneck. The bottleneck is only the real bottleneck
		if the workload is actually bound by the CPU and not by something else.

		For best results it is usually a good idea to use it with interval
		mode like -I 1000, as the bottleneck of workloads can change often.

		The top down metrics are collected per core instead of per
		CPU thread. Per core mode is automatically enabled
		and -a (global monitoring) is needed, requiring root rights or
		perf.perf_event_paranoid=-1.

		Topdown uses the full Performance Monitoring Unit, and needs
		disabling of the NMI watchdog (as root):
		echo 0 > /proc/sys/kernel/nmi_watchdog
		for best results. Otherwise the bottlenecks may be inconsistent
		on workload with changing phases.

		This enables --metric-only, unless overriden with --no-metric-only.

		To interpret the results it is usually needed to know on which
		CPUs the workload runs on. If needed the CPUs can be forced using
		taskset.

		EXAMPLES
		--------

tools/perf/arch/x86/util/Build

+1 −0

Original line number	Diff line number	Diff line
		@@ -3,6 +3,7 @@ libperf-y += tsc.o
		libperf-y += pmu.o
		libperf-y += kvm-stat.o
		libperf-y += perf_regs.o
		libperf-y += group.o

		libperf-$(CONFIG_DWARF) += dwarf-regs.o
		libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o

tools/perf/arch/x86/util/group.c

0 → 100644

+27 −0

Original line number	Diff line number	Diff line
		#include <stdio.h>
		#include "api/fs/fs.h"
		#include "util/group.h"

		/*
		* Check whether we can use a group for top down.
		* Without a group may get bad results due to multiplexing.
		*/
		bool arch_topdown_check_group(bool *warn)
		{
		int n;

		if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
		return false;
		if (n > 0) {
		*warn = true;
		return false;
		}
		return true;
		}

		void arch_topdown_group_warn(void)
		{
		fprintf(stderr,
		"nmi_watchdog enabled with topdown. May give wrong results.\n"
		"Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
		}

tools/perf/builtin-script.c

+13 −10

Original line number	Diff line number	Diff line
		@@ -339,7 +339,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
		*/
		static int perf_session__check_output_opt(struct perf_session *session)
		{
		int j;
		unsigned int j;
		struct perf_evsel *evsel;

		for (j = 0; j < PERF_TYPE_MAX; ++j) {
		@@ -388,9 +388,10 @@ static int perf_session__check_output_opt(struct perf_session *session)
		struct perf_event_attr *attr;

		j = PERF_TYPE_TRACEPOINT;
		evsel = perf_session__find_first_evtype(session, j);
		if (evsel == NULL)
		goto out;

		evlist__for_each(session->evlist, evsel) {
		if (evsel->attr.type != j)
		continue;

		attr = &evsel->attr;

		@@ -399,6 +400,8 @@ static int perf_session__check_output_opt(struct perf_session *session)
		output[j].fields \|= PERF_OUTPUT_SYM;
		output[j].fields \|= PERF_OUTPUT_DSO;
		set_print_ip_opts(attr);
		goto out;
		}
		}
		}