Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a4d71093 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo' of...

Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Enhance the error reporting of tracepoint event parsing, e.g.:

    $ oldperf record -e sched:sched_switc usleep 1
    event syntax error: 'sched:sched_switc'
                         \___ unknown tracepoint
    Run 'perf list' for a list of valid events

  Now we get the much nicer:

    $ perf record -e sched:sched_switc ls
    event syntax error: 'sched:sched_switc'
                         \___ can't access trace events

    Error: No permissions to read /sys/kernel/debug/tracing/events/sched/sched_switc
    Hint:  Try 'sudo mount -o remount,mode=755 /sys/kernel/debug'

  And after we have those mount point permissions fixed:

    $ perf record -e sched:sched_switc ls
    event syntax error: 'sched:sched_switc'
                         \___ unknown tracepoint

    Error: File /sys/kernel/debug/tracing/events/sched/sched_switc not found.
    Hint:  Perhaps this kernel misses some CONFIG_ setting to enable this feature?.

  Now its just a matter of using what git uses to suggest alternatives when we
  make a typo, i.e. that it is just an 'h' missing :-)

  I.e. basically now the event parsing routing uses the strerror_open()
  routines introduced by and used in 'perf trace' work. (Jiri Olsa)

Infrastructure changes:

- Export init/exit_probe_symbol_maps() from 'perf probe' for use in eBPF.
  (Namhyung Kim)

- Free perf_probe_event in cleanup_perf_probe_events(). (Namhyung Kim)

- regs_query_register_offset() infrastructure + implementation for x86.
  First user will be the perf/eBPF code. (Wang Nan)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 9059b284 bbbe6bf6
Loading
Loading
Loading
Loading
+49 −0
Original line number Diff line number Diff line
#ifndef __TOOLS_LINUX_ERR_H
#define __TOOLS_LINUX_ERR_H

#include <linux/compiler.h>
#include <linux/types.h>

#include <asm/errno.h>

/*
 * Original kernel header comment:
 *
 * Kernel pointers have redundant information, so we can use a
 * scheme where we can return either an error code or a normal
 * pointer with the same return value.
 *
 * This should be a per-architecture thing, to allow different
 * error and pointer decisions.
 *
 * Userspace note:
 * The same principle works for userspace, because 'error' pointers
 * fall down to the unused hole far from user space, as described
 * in Documentation/x86/x86_64/mm.txt for x86_64 arch:
 *
 * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
 * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 *
 * It should be the same case for other architectures, because
 * this code is used in generic kernel code.
 */
#define MAX_ERRNO	4095

#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)

static inline void * __must_check ERR_PTR(long error)
{
	return (void *) error;
}

static inline long __must_check PTR_ERR(__force const void *ptr)
{
	return (long) ptr;
}

static inline bool __must_check IS_ERR(__force const void *ptr)
{
	return IS_ERR_VALUE((unsigned long)ptr);
}

#endif /* _LINUX_ERR_H */
+1 −0
Original line number Diff line number Diff line
@@ -2,3 +2,4 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
HAVE_KVM_STAT_SUPPORT := 1
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+88 −34
Original line number Diff line number Diff line
@@ -21,55 +21,109 @@
 */

#include <stddef.h>
#include <errno.h> /* for EINVAL */
#include <string.h> /* for strcmp */
#include <linux/ptrace.h> /* for struct pt_regs */
#include <linux/kernel.h> /* for offsetof */
#include <dwarf-regs.h>

/*
 * Generic dwarf analysis helpers
 * See arch/x86/kernel/ptrace.c.
 * Different from it:
 *
 *  - Since struct pt_regs is defined differently for user and kernel,
 *    but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct
 *    field name of user's pt_regs), we make REG_OFFSET_NAME to accept
 *    both string name and reg field name.
 *
 *  - Since accessing x86_32's pt_regs from x86_64 building is difficult
 *    and vise versa, we simply fill offset with -1, so
 *    get_arch_regstr() still works but regs_query_register_offset()
 *    returns error.
 *    The only inconvenience caused by it now is that we are not allowed
 *    to generate BPF prologue for a x86_64 kernel if perf is built for
 *    x86_32. This is really a rare usecase.
 *
 *  - Order is different from kernel's ptrace.c for get_arch_regstr(). Use
 *    the order defined by dwarf.
 */

#define X86_32_MAX_REGS 8
const char *x86_32_regs_table[X86_32_MAX_REGS] = {
	"%ax",
	"%cx",
	"%dx",
	"%bx",
	"$stack",	/* Stack address instead of %sp */
	"%bp",
	"%si",
	"%di",
struct pt_regs_offset {
	const char *name;
	int offset;
};

#define REG_OFFSET_END {.name = NULL, .offset = 0}

#ifdef __x86_64__
# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1}
#else
# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1}
# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
#endif

static const struct pt_regs_offset x86_32_regoffset_table[] = {
	REG_OFFSET_NAME_32("%ax",	eax),
	REG_OFFSET_NAME_32("%cx",	ecx),
	REG_OFFSET_NAME_32("%dx",	edx),
	REG_OFFSET_NAME_32("%bx",	ebx),
	REG_OFFSET_NAME_32("$stack",	esp),	/* Stack address instead of %sp */
	REG_OFFSET_NAME_32("%bp",	ebp),
	REG_OFFSET_NAME_32("%si",	esi),
	REG_OFFSET_NAME_32("%di",	edi),
	REG_OFFSET_END,
};

#define X86_64_MAX_REGS 16
const char *x86_64_regs_table[X86_64_MAX_REGS] = {
	"%ax",
	"%dx",
	"%cx",
	"%bx",
	"%si",
	"%di",
	"%bp",
	"%sp",
	"%r8",
	"%r9",
	"%r10",
	"%r11",
	"%r12",
	"%r13",
	"%r14",
	"%r15",
static const struct pt_regs_offset x86_64_regoffset_table[] = {
	REG_OFFSET_NAME_64("%ax",	rax),
	REG_OFFSET_NAME_64("%dx",	rdx),
	REG_OFFSET_NAME_64("%cx",	rcx),
	REG_OFFSET_NAME_64("%bx",	rbx),
	REG_OFFSET_NAME_64("%si",	rsi),
	REG_OFFSET_NAME_64("%di",	rdi),
	REG_OFFSET_NAME_64("%bp",	rbp),
	REG_OFFSET_NAME_64("%sp",	rsp),
	REG_OFFSET_NAME_64("%r8",	r8),
	REG_OFFSET_NAME_64("%r9",	r9),
	REG_OFFSET_NAME_64("%r10",	r10),
	REG_OFFSET_NAME_64("%r11",	r11),
	REG_OFFSET_NAME_64("%r12",	r12),
	REG_OFFSET_NAME_64("%r13",	r13),
	REG_OFFSET_NAME_64("%r14",	r14),
	REG_OFFSET_NAME_64("%r15",	r15),
	REG_OFFSET_END,
};

/* TODO: switching by dwarf address size */
#ifdef __x86_64__
#define ARCH_MAX_REGS X86_64_MAX_REGS
#define arch_regs_table x86_64_regs_table
#define regoffset_table x86_64_regoffset_table
#else
#define ARCH_MAX_REGS X86_32_MAX_REGS
#define arch_regs_table x86_32_regs_table
#define regoffset_table x86_32_regoffset_table
#endif

/* Minus 1 for the ending REG_OFFSET_END */
#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1)

/* Return architecture dependent register string (for kprobe-tracer) */
const char *get_arch_regstr(unsigned int n)
{
	return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
	return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL;
}

/* Reuse code from arch/x86/kernel/ptrace.c */
/**
 * regs_query_register_offset() - query register offset from its name
 * @name:	the name of a register
 *
 * regs_query_register_offset() returns the offset of a register in struct
 * pt_regs from its name. If the name is invalid, this returns -EINVAL;
 */
int regs_query_register_offset(const char *name)
{
	const struct pt_regs_offset *roff;
	for (roff = regoffset_table; roff->name != NULL; roff++)
		if (!strcmp(roff->name, name))
			return roff->offset;
	return -EINVAL;
}
+5 −0
Original line number Diff line number Diff line
@@ -317,6 +317,10 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
	int i, k;
	const char *event = NULL, *group = NULL;

	ret = init_probe_symbol_maps(pevs->uprobes);
	if (ret < 0)
		return ret;

	ret = convert_perf_probe_events(pevs, npevs);
	if (ret < 0)
		goto out_cleanup;
@@ -354,6 +358,7 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)

out_cleanup:
	cleanup_perf_probe_events(pevs, npevs);
	exit_probe_symbol_maps();
	return ret;
}

+11 −8
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include <stdlib.h>
#include <sys/mman.h>
#include <linux/futex.h>
#include <linux/err.h>

/* For older distros: */
#ifndef MAP_STACK
@@ -245,13 +246,14 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void
	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);

	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
	if (evsel == NULL)
	if (IS_ERR(evsel))
		evsel = perf_evsel__newtp("syscalls", direction);

	if (evsel) {
	if (IS_ERR(evsel))
		return NULL;

	if (perf_evsel__init_syscall_tp(evsel, handler))
		goto out_delete;
	}

	return evsel;

@@ -1705,12 +1707,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
	sc->tp_format = trace_event__tp_format("syscalls", tp_name);

	if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
	}

	if (sc->tp_format == NULL)
	if (IS_ERR(sc->tp_format))
		return -1;

	sc->args = sc->tp_format->format.fields;
@@ -2390,7 +2392,8 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
	if (evsel == NULL)

	if (IS_ERR(evsel))
		return false;

	if (perf_evsel__field(evsel, "pathname") == NULL) {
Loading