Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ffa86c2f authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-4.12-20170314' of...

Merge tag 'perf-core-for-mingo-4.12-20170314' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Add PERF_RECORD_NAMESPACES so that the kernel can record information
  required to associate samples to namespaces, helping in container
  problem characterization.

  Now the 'perf record has a --namespace' option to ask for such info,
  and when present, it can be used, initially, via a new sort order,
  'cgroup_id', allowing histogram entry bucketization by a (device, inode)
  based cgroup identifier (Hari Bathini)

- Add --next option to 'perf sched timehist', showing what is the next
  thread to run (Brendan Gregg)

Fixes:

- Fix segfault with basic block 'cycles' sort dimension (Changbin Du)

- Add c2c to command-list.txt, making it appear in the 'perf help'
  output (Changbin Du)

- Fix zeroing of 'abs_path' variable in the perf hists browser switch
  file code (Changbin Du)

- Hide tips messages when -q/--quiet is given to 'perf report' (Namhyung Kim)

Infrastructure changes:

- Use ref_reloc_sym + offset to setup kretprobes (Naveen Rao)

- Ignore generated files pmu-events/{jevents,pmu-events.c} for git (Changbin Du)

Documentation changes:

- Document +field style argument support for --field option (Changbin Du)

- Clarify 'perf c2c --stats' help message (Namhyung Kim)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 84e5b549 5f6bee34
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -8,8 +8,9 @@ Overview
--------
These events are similar to tracepoint based events. Instead of Tracepoint,
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
kprobes can probe (this means, all functions body except for __kprobes
functions). Unlike the Tracepoint based event, this can be added and removed
kprobes can probe (this means, all functions except those with
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
Unlike the Tracepoint based event, this can be added and removed
dynamically, on the fly.

To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
+2 −0
Original line number Diff line number Diff line
@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks

extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);

/* Callchains */
@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
static inline void perf_event_exec(void)				{ }
static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
static inline void perf_event_fork(struct task_struct *tsk)		{ }
static inline void perf_event_init(void)				{ }
static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
+31 −1
Original line number Diff line number Diff line
@@ -344,7 +344,8 @@ struct perf_event_attr {
				use_clockid    :  1, /* use @clockid for time fields */
				context_switch :  1, /* context switch data */
				write_backward :  1, /* Write ring buffer from end to beginning */
				__reserved_1   : 36;
				namespaces     :  1, /* include namespaces data */
				__reserved_1   : 35;

	union {
		__u32		wakeup_events;	  /* wakeup every n events */
@@ -610,6 +611,23 @@ struct perf_event_header {
	__u16	size;
};

struct perf_ns_link_info {
	__u64	dev;
	__u64	ino;
};

enum {
	NET_NS_INDEX		= 0,
	UTS_NS_INDEX		= 1,
	IPC_NS_INDEX		= 2,
	PID_NS_INDEX		= 3,
	USER_NS_INDEX		= 4,
	MNT_NS_INDEX		= 5,
	CGROUP_NS_INDEX		= 6,

	NR_NAMESPACES,		/* number of available namespaces */
};

enum perf_event_type {

	/*
@@ -862,6 +880,18 @@ enum perf_event_type {
	 */
	PERF_RECORD_SWITCH_CPU_WIDE		= 15,

	/*
	 * struct {
	 *	struct perf_event_header	header;
	 *	u32				pid;
	 *	u32				tid;
	 *	u64				nr_namespaces;
	 *	{ u64				dev, inode; } [nr_namespaces];
	 *	struct sample_id		sample_id;
	 * };
	 */
	PERF_RECORD_NAMESPACES			= 16,

	PERF_RECORD_MAX,			/* non-ABI */
};

+139 −0
Original line number Diff line number Diff line
@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>

#include "internal.h"

@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);

static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
		atomic_dec(&nr_mmap_events);
	if (event->attr.comm)
		atomic_dec(&nr_comm_events);
	if (event->attr.namespaces)
		atomic_dec(&nr_namespaces_events);
	if (event->attr.task)
		atomic_dec(&nr_task_events);
	if (event->attr.freq)
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
	perf_event_task(task, NULL, 1);
	perf_event_namespaces(task);
}

/*
@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
	perf_event_comm_event(&comm_event);
}

/*
 * namespaces tracking
 */

struct perf_namespaces_event {
	struct task_struct		*task;

	struct {
		struct perf_event_header	header;

		u32				pid;
		u32				tid;
		u64				nr_namespaces;
		struct perf_ns_link_info	link_info[NR_NAMESPACES];
	} event_id;
};

static int perf_event_namespaces_match(struct perf_event *event)
{
	return event->attr.namespaces;
}

static void perf_event_namespaces_output(struct perf_event *event,
					 void *data)
{
	struct perf_namespaces_event *namespaces_event = data;
	struct perf_output_handle handle;
	struct perf_sample_data sample;
	int ret;

	if (!perf_event_namespaces_match(event))
		return;

	perf_event_header__init_id(&namespaces_event->event_id.header,
				   &sample, event);
	ret = perf_output_begin(&handle, event,
				namespaces_event->event_id.header.size);
	if (ret)
		return;

	namespaces_event->event_id.pid = perf_event_pid(event,
							namespaces_event->task);
	namespaces_event->event_id.tid = perf_event_tid(event,
							namespaces_event->task);

	perf_output_put(&handle, namespaces_event->event_id);

	perf_event__output_id_sample(event, &handle, &sample);

	perf_output_end(&handle);
}

static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
				   struct task_struct *task,
				   const struct proc_ns_operations *ns_ops)
{
	struct path ns_path;
	struct inode *ns_inode;
	void *error;

	error = ns_get_path(&ns_path, task, ns_ops);
	if (!error) {
		ns_inode = ns_path.dentry->d_inode;
		ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
		ns_link_info->ino = ns_inode->i_ino;
	}
}

void perf_event_namespaces(struct task_struct *task)
{
	struct perf_namespaces_event namespaces_event;
	struct perf_ns_link_info *ns_link_info;

	if (!atomic_read(&nr_namespaces_events))
		return;

	namespaces_event = (struct perf_namespaces_event){
		.task	= task,
		.event_id  = {
			.header = {
				.type = PERF_RECORD_NAMESPACES,
				.misc = 0,
				.size = sizeof(namespaces_event.event_id),
			},
			/* .pid */
			/* .tid */
			.nr_namespaces = NR_NAMESPACES,
			/* .link_info[NR_NAMESPACES] */
		},
	};

	ns_link_info = namespaces_event.event_id.link_info;

	perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
			       task, &mntns_operations);

#ifdef CONFIG_USER_NS
	perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
			       task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
	perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
			       task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
	perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
			       task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
	perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
			       task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
	perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
			       task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
	perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
			       task, &cgroupns_operations);
#endif

	perf_iterate_sb(perf_event_namespaces_output,
			&namespaces_event,
			NULL);
}

/*
 * mmap tracking
 */
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
		atomic_inc(&nr_mmap_events);
	if (event->attr.comm)
		atomic_inc(&nr_comm_events);
	if (event->attr.namespaces)
		atomic_inc(&nr_namespaces_events);
	if (event->attr.task)
		atomic_inc(&nr_task_events);
	if (event->attr.freq)
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
			return -EACCES;
	}

	if (attr.namespaces) {
		if (!capable(CAP_SYS_ADMIN))
			return -EACCES;
	}

	if (attr.freq) {
		if (attr.sample_freq > sysctl_perf_event_sample_rate)
			return -EINVAL;
+2 −0
Original line number Diff line number Diff line
@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
		}
	}

	perf_event_namespaces(current);

bad_unshare_cleanup_cred:
	if (new_cred)
		put_cred(new_cred);
Loading