Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 14520d63 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-20160908' of...

Merge tag 'perf-core-for-mingo-20160908' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Add branch stack / basic block info to 'perf annotate --stdio', where for
  each branch, we add an asm comment after the instruction with information on
  how often it was taken and predicted. See example with color output at:

    http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png



  (Peter Zijlstra)

- Only open an evsel in CPUs in its cpu map, fixing some use cases in
  systems with multiple PMUs with different CPU maps (Mark Rutland)

- Fix handling of huge TLB maps, recognizing it as anonymous (Wang Nan)

Infrastructure changes:

- Remove the symbol filtering code, i.e. the callbacks passed to all functions
  that could end up loading a DSO symtab, simplifying the code, eventually
  allowing what we should have had since day one: removing the 'map' parameter
  from dso__load() functions (Arnaldo Carvalho de Melo)

Arch specific build fixes:

- Fix detached tarball build on powerpc, where we were still accessing a
  file outside tools/ (Ravi Bangoria)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents c0b172e5 25b8592e
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -34,6 +34,10 @@
#define TRACEFS_MAGIC          0x74726163
#endif

#ifndef HUGETLBFS_MAGIC
#define HUGETLBFS_MAGIC        0x958458f6
#endif

static const char * const sysfs__fs_known_mountpoints[] = {
	"/sys",
	0,
@@ -67,6 +71,10 @@ static const char * const tracefs__known_mountpoints[] = {
	0,
};

static const char * const hugetlbfs__known_mountpoints[] = {
	0,
};

struct fs {
	const char		*name;
	const char * const	*mounts;
@@ -80,6 +88,7 @@ enum {
	FS__PROCFS  = 1,
	FS__DEBUGFS = 2,
	FS__TRACEFS = 3,
	FS__HUGETLBFS = 4,
};

#ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@ static struct fs fs__entries[] = {
		.mounts	= tracefs__known_mountpoints,
		.magic	= TRACEFS_MAGIC,
	},
	[FS__HUGETLBFS] = {
		.name	= "hugetlbfs",
		.mounts = hugetlbfs__known_mountpoints,
		.magic	= HUGETLBFS_MAGIC,
	},
};

static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@ FS(sysfs, FS__SYSFS);
FS(procfs,  FS__PROCFS);
FS(debugfs, FS__DEBUGFS);
FS(tracefs, FS__TRACEFS);
FS(hugetlbfs, FS__HUGETLBFS);

int filename__read_int(const char *filename, int *value)
{
+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ FS(sysfs)
FS(procfs)
FS(debugfs)
FS(tracefs)
FS(hugetlbfs)

#undef FS

+1 −1
Original line number Diff line number Diff line
@@ -108,7 +108,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
	int i = 0;

	map = get_target_map(pev->target, pev->uprobes);
	if (!map || map__load(map, NULL) < 0)
	if (!map || map__load(map) < 0)
		return;

	for (i = 0; i < ntevs; i++) {
+104 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include "util/tool.h"
#include "util/data.h"
#include "arch/common.h"
#include "util/block-range.h"

#include <dlfcn.h>
#include <linux/bitmap.h>
@@ -46,6 +47,103 @@ struct perf_annotate {
	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};

/*
 * Given one basic block:
 *
 *	from	to		branch_i
 *	* ----> *
 *		|
 *		| block
 *		v
 *		* ----> *
 *		from	to	branch_i+1
 *
 * where the horizontal are the branches and the vertical is the executed
 * block of instructions.
 *
 * We count, for each 'instruction', the number of blocks that covered it as
 * well as count the ratio each branch is taken.
 *
 * We can do this without knowing the actual instruction stream by keeping
 * track of the address ranges. We break down ranges such that there is no
 * overlap and iterate from the start until the end.
 *
 * @acme: once we parse the objdump output _before_ processing the samples,
 * we can easily fold the branch.cycles IPC bits in.
 */
static void process_basic_block(struct addr_map_symbol *start,
				struct addr_map_symbol *end,
				struct branch_flags *flags)
{
	struct symbol *sym = start->sym;
	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
	struct block_range_iter iter;
	struct block_range *entry;

	/*
	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
	 */
	if (!start->addr || start->addr > end->addr)
		return;

	iter = block_range__create(start->addr, end->addr);
	if (!block_range_iter__valid(&iter))
		return;

	/*
	 * First block in range is a branch target.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_target);
	entry->entry++;

	do {
		entry = block_range_iter(&iter);

		entry->coverage++;
		entry->sym = sym;

		if (notes)
			notes->max_coverage = max(notes->max_coverage, entry->coverage);

	} while (block_range_iter__next(&iter));

	/*
	 * Last block in rage is a branch.
	 */
	entry = block_range_iter(&iter);
	assert(entry->is_branch);
	entry->taken++;
	if (flags->predicted)
		entry->pred++;
}

static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
				 struct perf_sample *sample)
{
	struct addr_map_symbol *prev = NULL;
	struct branch_info *bi;
	int i;

	if (!bs || !bs->nr)
		return;

	bi = sample__resolve_bstack(sample, al);
	if (!bi)
		return;

	for (i = bs->nr - 1; i >= 0; i--) {
		/*
		 * XXX filter against symbol
		 */
		if (prev)
			process_basic_block(prev, &bi[i].from, &bi[i].flags);
		prev = &bi[i].to;
	}

	free(bi);
}

static int perf_evsel__add_sample(struct perf_evsel *evsel,
				  struct perf_sample *sample,
				  struct addr_location *al,
@@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
		return 0;
	}

	/*
	 * XXX filtered samples can still have branch entires pointing into our
	 * symbol and are missed.
	 */
	process_branch_stack(sample->branch_stack, al, sample);

	sample->period = 1;
	sample->weight = 1;

+1 −1
Original line number Diff line number Diff line
@@ -429,7 +429,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
	if (al.map != NULL) {
		if (!al.map->dso->hit) {
			al.map->dso->hit = 1;
			if (map__load(al.map, NULL) >= 0) {
			if (map__load(al.map) >= 0) {
				dso__inject_build_id(al.map->dso, tool, machine);
				/*
				 * If this fails, too bad, let the other side
Loading