Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 510457ec authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-4.13-20170718' of...

Merge tag 'perf-core-for-mingo-4.13-20170718' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Initial support for namespaces, using setns to access files in
  namespaces, grabbing their build-ids, etc. We still need to work
  more to deal with namespaces that vanish before we can get the
  needed data to do analysis, but this should be as good as what is
  in bcc now (Krister Johansen)

- Add header record types to pipe-mode, now this command:

  $ perf record -o - -e cycles sleep 1 | perf report --stdio --header

  Will show the same as in non-pipe mode, i.e. involving a perf.data
  file (David Carrillo-Cisneros)

- Implement a visual marker for fused x86 instructions in the annotate
  TUI browser, available now in 'perf report', more work needed to have
  it available as well in 'perf top' (Jin Yao)

  Further explanation from one of Jin's patches:

       │   ┌──cmpl   $0x0,argp_program_version_hook
 81.93 │   ├──je     20
       │   │  lock   cmpxchg %esi,0x38a9a4(%rip)
       │   │↓ jne    29
       │   │↓ jmp    43
 11.47 │20:└─→cmpxch %esi,0x38a999(%rip)

  That means the cmpl+je is a fused instruction pair and they should be
  considered together.

- Record the branch type and then show statistics and info about
  in callchain entries (Jin Yao)

  Example from one of Jin's patches:

  # perf record -g -j any,save_type
  # perf report --branch-history --stdio --no-children

  38.50%  div.c:45                [.] main                    div
          |
          ---main div.c:42 (RET CROSS_2M cycles:2)
             compute_flag div.c:28 (cycles:2)
             compute_flag div.c:27 (RET CROSS_2M cycles:1)
             rand rand.c:28 (cycles:1)
             rand rand.c:28 (RET CROSS_2M cycles:1)
             __random random.c:298 (cycles:1)
             __random random.c:297 (COND_BWD CROSS_2M cycles:1)
             __random random.c:295 (cycles:1)
             __random random.c:295 (COND_BWD CROSS_2M cycles:1)
             __random random.c:295 (cycles:1)
             __random random.c:295 (RET CROSS_2M cycles:9)

- Beautify the fcntl syscall, which is an interesting one in the sense
  that infrastructure had to be put in place to change the formatters of
  some arguments according to the value in a previous one, i.e. cmd
  dictates how arg and the syscall return will be formatted.
  (Arnaldo Carvalho de Melo

Infrastructure changes:

- 'perf test attr' fixes (Jiri Olsa)

Vendor events changes:

- Add POWER9 PMU events Sukadev (Bhattiprolu)

- Support additional POWER8+ PVR in PMU mapfile (Shriya)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 3bda69c1 b851dd49
Loading
Loading
Loading
Loading
+51 −1
Original line number Diff line number Diff line
@@ -109,6 +109,9 @@ enum {
	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */

	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */

};

#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -510,6 +513,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
		cpuc->lbr_entries[i].in_tx	= 0;
		cpuc->lbr_entries[i].abort	= 0;
		cpuc->lbr_entries[i].cycles	= 0;
		cpuc->lbr_entries[i].type	= 0;
		cpuc->lbr_entries[i].reserved	= 0;
	}
	cpuc->lbr_stack.nr = i;
@@ -596,6 +600,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
		cpuc->lbr_entries[out].in_tx	 = in_tx;
		cpuc->lbr_entries[out].abort	 = abort;
		cpuc->lbr_entries[out].cycles	 = cycles;
		cpuc->lbr_entries[out].type	 = 0;
		cpuc->lbr_entries[out].reserved	 = 0;
		out++;
	}
@@ -673,6 +678,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)

	if (br_type & PERF_SAMPLE_BRANCH_CALL)
		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;

	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
		mask |= X86_BR_TYPE_SAVE;

	/*
	 * stash actual user request into reg, it may
	 * be used by fixup code for some CPU
@@ -926,6 +935,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
	return ret;
}

#define X86_BR_TYPE_MAP_MAX	16

static int branch_map[X86_BR_TYPE_MAP_MAX] = {
	PERF_BR_CALL,		/* X86_BR_CALL */
	PERF_BR_RET,		/* X86_BR_RET */
	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
	PERF_BR_UNKNOWN,	/* X86_BR_INT */
	PERF_BR_UNKNOWN,	/* X86_BR_IRET */
	PERF_BR_COND,		/* X86_BR_JCC */
	PERF_BR_UNCOND,		/* X86_BR_JMP */
	PERF_BR_UNKNOWN,	/* X86_BR_IRQ */
	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
	PERF_BR_UNKNOWN,	/* X86_BR_NO_TX */
	PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
	PERF_BR_UNKNOWN,	/* X86_BR_CALL_STACK */
	PERF_BR_IND,		/* X86_BR_IND_JMP */
};

static int
common_branch_type(int type)
{
	int i;

	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */

	if (type) {
		i = __ffs(type);
		if (i < X86_BR_TYPE_MAP_MAX)
			return branch_map[i];
	}

	return PERF_BR_UNKNOWN;
}

/*
 * implement actual branch filter based on user demand.
 * Hardware may not exactly satisfy that request, thus
@@ -942,7 +988,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
	bool compress = false;

	/* if sampling all branches, then nothing to filter */
	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
		return;

	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -963,6 +1010,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
			cpuc->lbr_entries[i].from = 0;
			compress = true;
		}

		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
			cpuc->lbr_entries[i].type = common_branch_type(type);
	}

	if (!compress)
+26 −1
Original line number Diff line number Diff line
@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
	PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT	= 14, /* no flags */
	PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT	= 15, /* no cycles */

	PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT	= 16, /* save branch type */

	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
};

@@ -198,9 +200,30 @@ enum perf_branch_sample_type {
	PERF_SAMPLE_BRANCH_NO_FLAGS	= 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
	PERF_SAMPLE_BRANCH_NO_CYCLES	= 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,

	PERF_SAMPLE_BRANCH_TYPE_SAVE	=
		1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,

	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};

/*
 * Common flow change classification
 */
enum {
	PERF_BR_UNKNOWN		= 0,	/* unknown */
	PERF_BR_COND		= 1,	/* conditional */
	PERF_BR_UNCOND		= 2,	/* unconditional  */
	PERF_BR_IND		= 3,	/* indirect */
	PERF_BR_CALL		= 4,	/* function call */
	PERF_BR_IND_CALL	= 5,	/* indirect function call */
	PERF_BR_RET		= 6,	/* function return */
	PERF_BR_SYSCALL		= 7,	/* syscall */
	PERF_BR_SYSRET		= 8,	/* syscall return */
	PERF_BR_COND_CALL	= 9,	/* conditional function call */
	PERF_BR_COND_RET	= 10,	/* conditional function return */
	PERF_BR_MAX,
};

#define PERF_SAMPLE_BRANCH_PLM_ALL \
	(PERF_SAMPLE_BRANCH_USER|\
	 PERF_SAMPLE_BRANCH_KERNEL|\
@@ -1015,6 +1038,7 @@ union perf_mem_data_src {
 *     in_tx: running in a hardware transaction
 *     abort: aborting a hardware transaction
 *    cycles: cycles from last branch (or 0 if not supported)
 *      type: branch type
 */
struct perf_branch_entry {
	__u64	from;
@@ -1024,7 +1048,8 @@ struct perf_branch_entry {
		in_tx:1,    /* in transaction */
		abort:1,    /* transaction abort */
		cycles:16,  /* cycle count to last branch */
		reserved:44;
		type:4,     /* branch type */
		reserved:40;
};

#endif /* _UAPI_LINUX_PERF_EVENT_H */
+3 −0
Original line number Diff line number Diff line
@@ -10,3 +10,6 @@
#ifndef __NR_getcpu
# define __NR_getcpu 318
#endif
#ifndef __NR_setns
# define __NR_setns 346
#endif
+3 −0
Original line number Diff line number Diff line
@@ -10,3 +10,6 @@
#ifndef __NR_getcpu
# define __NR_getcpu 309
#endif
#ifndef __NR_setns
#define __NR_setns 308
#endif
+17 −0
Original line number Diff line number Diff line
#ifndef _UAPI_ASM_X86_UNISTD_H
#define _UAPI_ASM_X86_UNISTD_H

/* x32 syscall flag bit */
#define __X32_SYSCALL_BIT	0x40000000

#ifndef __KERNEL__
# ifdef __i386__
#  include <asm/unistd_32.h>
# elif defined(__ILP32__)
#  include <asm/unistd_x32.h>
# else
#  include <asm/unistd_64.h>
# endif
#endif

#endif /* _UAPI_ASM_X86_UNISTD_H */
Loading