Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 42c4fb77 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-20160530' of...

Merge tag 'perf-core-for-mingo-20160530' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible/kernel ABI changes:

- Per event callchain limit: Recently we introduced a sysctl to tune the
  max-stack for all events for which callchains were requested:

  $ sysctl kernel.perf_event_max_stack
  kernel.perf_event_max_stack = 127

  Now this patch introduces a way to configure this per event, i.e. this
  becomes possible:

  $ perf record -e sched:*/max-stack=2/ -e block:*/max-stack=10/ -a

  allowing finer tuning of how much buffer space callchains use.

  This uses an u16 from the reserved space at the end, leaving another
  u16 for future use.

  There has been interest in even finer tuning, namely to control the
  max stack for kernel and userspace callchains separately. Further
  discussion is needed, we may for instance use the remaining u16 for
  that and when it is present, assume that the sample_max_stack introduced
  in this patch applies for the kernel, and the u16 left is used for
  limiting the userspace callchain. (Arnaldo Carvalho de Melo)

Infrastructure changes:

- Adopt get_main_thread from db-export.c (Andi Kleen)

- More prep work for backward ring buffer support (Wang Nan)

- Prep work for supporting SDT (Statically Defined Tracing)
  tracepoints (Masami Hiramatsu)

- Add arch/*/include/generated/ to .gitignore (Taeung Song)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 71146051 01412261
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1076,7 +1076,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
		   u32 max_stack, bool crosstask, bool add_mark);
extern int get_callchain_buffers(void);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);

extern int sysctl_perf_event_max_stack;
+5 −1
Original line number Diff line number Diff line
@@ -276,6 +276,9 @@ enum perf_event_read_format {

/*
 * Hardware event_id to monitor via a performance monitoring event:
 *
 * @sample_max_stack: Max number of frame pointers in a callchain,
 *		      should be < /proc/sys/kernel/perf_event_max_stack
 */
struct perf_event_attr {

@@ -385,7 +388,8 @@ struct perf_event_attr {
	 * Wakeup watermark for AUX area
	 */
	__u32	aux_watermark;
	__u32	__reserved_2;	/* align to __u64 */
	__u16	sample_max_stack;
	__u16	__reserved_2;	/* align to __u64 */
};

#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+1 −1
Original line number Diff line number Diff line
@@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
	if (err)
		goto free_smap;

	err = get_callchain_buffers();
	err = get_callchain_buffers(sysctl_perf_event_max_stack);
	if (err)
		goto free_smap;

+12 −2
Original line number Diff line number Diff line
@@ -104,7 +104,7 @@ static int alloc_callchain_buffers(void)
	return -ENOMEM;
}

int get_callchain_buffers(void)
int get_callchain_buffers(int event_max_stack)
{
	int err = 0;
	int count;
@@ -121,6 +121,15 @@ int get_callchain_buffers(void)
		/* If the allocation failed, give up */
		if (!callchain_cpus_entries)
			err = -ENOMEM;
		/*
		 * If requesting per event more than the global cap,
		 * return a different error to help userspace figure
		 * this out.
		 *
		 * And also do it here so that we have &callchain_mutex held.
		 */
		if (event_max_stack > sysctl_perf_event_max_stack)
			err = -EOVERFLOW;
		goto exit;
	}

@@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
	bool user   = !event->attr.exclude_callchain_user;
	/* Disallow cross-task user callchains. */
	bool crosstask = event->ctx->task && event->ctx->task != current;
	const u32 max_stack = event->attr.sample_max_stack;

	if (!kernel && !user)
		return NULL;

	return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true);
	return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true);
}

struct perf_callchain_entry *
+4 −1
Original line number Diff line number Diff line
@@ -8843,7 +8843,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,

	if (!event->parent) {
		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
			err = get_callchain_buffers();
			err = get_callchain_buffers(attr->sample_max_stack);
			if (err)
				goto err_addr_filters;
		}
@@ -9165,6 +9165,9 @@ SYSCALL_DEFINE5(perf_event_open,
			return -EINVAL;
	}

	if (!attr.sample_max_stack)
		attr.sample_max_stack = sysctl_perf_event_max_stack;

	/*
	 * In cgroup mode, the pid argument is used to pass the fd
	 * opened to the cgroup directory in cgroupfs. The cpu argument
Loading