Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e4222673 authored by Hari Bathini's avatar Hari Bathini Committed by Arnaldo Carvalho de Melo
Browse files

perf: Add PERF_RECORD_NAMESPACES to include namespaces related info



With the advert of container technologies like docker, that depend on
namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for
recording namespaces related info. By recording info for every
namespace, it is left to userspace to take a call on the definition of a
container and trace containers by updating perf tool accordingly.

Each namespace has a combination of device and inode numbers. Though
every namespace has the same device number currently, that may change in
future to avoid the need for a namespace of namespaces. Considering such
possibility, record both device and inode numbers separately for each
namespace.

Signed-off-by: default avatarHari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 3ef5b402
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks


extern void perf_event_exec(void);
extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);


/* Callchains */
/* Callchains */
@@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
static inline void perf_event_exec(void)				{ }
static inline void perf_event_exec(void)				{ }
static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
static inline void perf_event_fork(struct task_struct *tsk)		{ }
static inline void perf_event_fork(struct task_struct *tsk)		{ }
static inline void perf_event_init(void)				{ }
static inline void perf_event_init(void)				{ }
static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
+31 −1
Original line number Original line Diff line number Diff line
@@ -344,7 +344,8 @@ struct perf_event_attr {
				use_clockid    :  1, /* use @clockid for time fields */
				use_clockid    :  1, /* use @clockid for time fields */
				context_switch :  1, /* context switch data */
				context_switch :  1, /* context switch data */
				write_backward :  1, /* Write ring buffer from end to beginning */
				write_backward :  1, /* Write ring buffer from end to beginning */
				__reserved_1   : 36;
				namespaces     :  1, /* include namespaces data */
				__reserved_1   : 35;


	union {
	union {
		__u32		wakeup_events;	  /* wakeup every n events */
		__u32		wakeup_events;	  /* wakeup every n events */
@@ -610,6 +611,23 @@ struct perf_event_header {
	__u16	size;
	__u16	size;
};
};


struct perf_ns_link_info {
	__u64	dev;
	__u64	ino;
};

enum {
	NET_NS_INDEX		= 0,
	UTS_NS_INDEX		= 1,
	IPC_NS_INDEX		= 2,
	PID_NS_INDEX		= 3,
	USER_NS_INDEX		= 4,
	MNT_NS_INDEX		= 5,
	CGROUP_NS_INDEX		= 6,

	NR_NAMESPACES,		/* number of available namespaces */
};

enum perf_event_type {
enum perf_event_type {


	/*
	/*
@@ -862,6 +880,18 @@ enum perf_event_type {
	 */
	 */
	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
	PERF_RECORD_SWITCH_CPU_WIDE		= 15,


	/*
	 * struct {
	 *	struct perf_event_header	header;
	 *	u32				pid;
	 *	u32				tid;
	 *	u64				nr_namespaces;
	 *	{ u64				dev, inode; } [nr_namespaces];
	 *	struct sample_id		sample_id;
	 * };
	 */
	PERF_RECORD_NAMESPACES			= 16,

	PERF_RECORD_MAX,			/* non-ABI */
	PERF_RECORD_MAX,			/* non-ABI */
};
};


+139 −0
Original line number Original line Diff line number Diff line
@@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>


#include "internal.h"
#include "internal.h"


@@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);


static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
@@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
		atomic_dec(&nr_mmap_events);
		atomic_dec(&nr_mmap_events);
	if (event->attr.comm)
	if (event->attr.comm)
		atomic_dec(&nr_comm_events);
		atomic_dec(&nr_comm_events);
	if (event->attr.namespaces)
		atomic_dec(&nr_namespaces_events);
	if (event->attr.task)
	if (event->attr.task)
		atomic_dec(&nr_task_events);
		atomic_dec(&nr_task_events);
	if (event->attr.freq)
	if (event->attr.freq)
@@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
void perf_event_fork(struct task_struct *task)
{
{
	perf_event_task(task, NULL, 1);
	perf_event_task(task, NULL, 1);
	perf_event_namespaces(task);
}
}


/*
/*
@@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
	perf_event_comm_event(&comm_event);
	perf_event_comm_event(&comm_event);
}
}


/*
 * namespaces tracking
 */

struct perf_namespaces_event {
	struct task_struct		*task;

	struct {
		struct perf_event_header	header;

		u32				pid;
		u32				tid;
		u64				nr_namespaces;
		struct perf_ns_link_info	link_info[NR_NAMESPACES];
	} event_id;
};

static int perf_event_namespaces_match(struct perf_event *event)
{
	return event->attr.namespaces;
}

static void perf_event_namespaces_output(struct perf_event *event,
					 void *data)
{
	struct perf_namespaces_event *namespaces_event = data;
	struct perf_output_handle handle;
	struct perf_sample_data sample;
	int ret;

	if (!perf_event_namespaces_match(event))
		return;

	perf_event_header__init_id(&namespaces_event->event_id.header,
				   &sample, event);
	ret = perf_output_begin(&handle, event,
				namespaces_event->event_id.header.size);
	if (ret)
		return;

	namespaces_event->event_id.pid = perf_event_pid(event,
							namespaces_event->task);
	namespaces_event->event_id.tid = perf_event_tid(event,
							namespaces_event->task);

	perf_output_put(&handle, namespaces_event->event_id);

	perf_event__output_id_sample(event, &handle, &sample);

	perf_output_end(&handle);
}

static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
				   struct task_struct *task,
				   const struct proc_ns_operations *ns_ops)
{
	struct path ns_path;
	struct inode *ns_inode;
	void *error;

	error = ns_get_path(&ns_path, task, ns_ops);
	if (!error) {
		ns_inode = ns_path.dentry->d_inode;
		ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
		ns_link_info->ino = ns_inode->i_ino;
	}
}

void perf_event_namespaces(struct task_struct *task)
{
	struct perf_namespaces_event namespaces_event;
	struct perf_ns_link_info *ns_link_info;

	if (!atomic_read(&nr_namespaces_events))
		return;

	namespaces_event = (struct perf_namespaces_event){
		.task	= task,
		.event_id  = {
			.header = {
				.type = PERF_RECORD_NAMESPACES,
				.misc = 0,
				.size = sizeof(namespaces_event.event_id),
			},
			/* .pid */
			/* .tid */
			.nr_namespaces = NR_NAMESPACES,
			/* .link_info[NR_NAMESPACES] */
		},
	};

	ns_link_info = namespaces_event.event_id.link_info;

	perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
			       task, &mntns_operations);

#ifdef CONFIG_USER_NS
	perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
			       task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
	perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
			       task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
	perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
			       task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
	perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
			       task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
	perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
			       task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
	perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
			       task, &cgroupns_operations);
#endif

	perf_iterate_sb(perf_event_namespaces_output,
			&namespaces_event,
			NULL);
}

/*
/*
 * mmap tracking
 * mmap tracking
 */
 */
@@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
		atomic_inc(&nr_mmap_events);
		atomic_inc(&nr_mmap_events);
	if (event->attr.comm)
	if (event->attr.comm)
		atomic_inc(&nr_comm_events);
		atomic_inc(&nr_comm_events);
	if (event->attr.namespaces)
		atomic_inc(&nr_namespaces_events);
	if (event->attr.task)
	if (event->attr.task)
		atomic_inc(&nr_task_events);
		atomic_inc(&nr_task_events);
	if (event->attr.freq)
	if (event->attr.freq)
@@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
			return -EACCES;
			return -EACCES;
	}
	}


	if (attr.namespaces) {
		if (!capable(CAP_SYS_ADMIN))
			return -EACCES;
	}

	if (attr.freq) {
	if (attr.freq) {
		if (attr.sample_freq > sysctl_perf_event_sample_rate)
		if (attr.sample_freq > sysctl_perf_event_sample_rate)
			return -EINVAL;
			return -EINVAL;
+2 −0
Original line number Original line Diff line number Diff line
@@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
		}
		}
	}
	}


	perf_event_namespaces(current);

bad_unshare_cleanup_cred:
bad_unshare_cleanup_cred:
	if (new_cred)
	if (new_cred)
		put_cred(new_cred);
		put_cred(new_cred);
+3 −0
Original line number Original line Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>


static struct kmem_cache *nsproxy_cachep;
static struct kmem_cache *nsproxy_cachep;


@@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
		goto out;
		goto out;
	}
	}
	switch_task_namespaces(tsk, new_nsproxy);
	switch_task_namespaces(tsk, new_nsproxy);

	perf_event_namespaces(tsk);
out:
out:
	fput(file);
	fput(file);
	return err;
	return err;