Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0793a61d authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Ingo Molnar
Browse files

performance counters: core code



Implement the core kernel bits of Performance Counters subsystem.

The Linux Performance Counter subsystem provides an abstraction of
performance counter hardware capabilities. It provides per task and per
CPU counters, and it provides event capabilities on top of those.

Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.

The special file descriptor is opened via the perf_counter_open()
system call:

 int
 perf_counter_open(u32 hw_event_type,
                   u32 hw_event_period,
                   u32 record_type,
                   pid_t pid,
                   int cpu);

The syscall returns the new fd. The fd can be used via the normal
VFS system calls: read() can be used to read the counter, fcntl()
can be used to set the blocking mode, etc.

Multiple counters can be kept open at a time, and the counters
can be poll()ed.

See more details in Documentation/perf-counters.txt.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b5aa97e8
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include <linux/kbd_kern.h>
#include <linux/proc_fs.h>
#include <linux/quotaops.h>
#include <linux/perf_counter.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/suspend.h>
@@ -244,6 +245,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
	struct pt_regs *regs = get_irq_regs();
	if (regs)
		show_regs(regs);
	perf_counter_print_debug();
}
static struct sysrq_key_op sysrq_showregs_op = {
	.handler	= sysrq_handle_showregs,
+171 −0
Original line number Diff line number Diff line
/*
 *  Performance counters:
 *
 *   Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
 *   Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
 *
 *  Data type definitions, declarations, prototypes.
 *
 *  Started by: Thomas Gleixner and Ingo Molnar
 *
 *  For licencing details see kernel-base/COPYING
 */
#ifndef _LINUX_PERF_COUNTER_H
#define _LINUX_PERF_COUNTER_H

#include <asm/atomic.h>

#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/spinlock.h>

struct task_struct;

/*
 * Generalized hardware event types, used by the hw_event_type parameter
 * of the sys_perf_counter_open() syscall:
 */
enum hw_event_types {
	PERF_COUNT_CYCLES,
	PERF_COUNT_INSTRUCTIONS,
	PERF_COUNT_CACHE_REFERENCES,
	PERF_COUNT_CACHE_MISSES,
	PERF_COUNT_BRANCH_INSTRUCTIONS,
	PERF_COUNT_BRANCH_MISSES,
	/*
	 * If this bit is set in the type, then trigger NMI sampling:
	 */
	PERF_COUNT_NMI			= (1 << 30),
};

/*
 * IRQ-notification data record type:
 */
enum perf_record_type {
	PERF_RECORD_SIMPLE,
	PERF_RECORD_IRQ,
	PERF_RECORD_GROUP,
};

/**
 * struct hw_perf_counter - performance counter hardware details
 */
struct hw_perf_counter {
	u64			config;
	unsigned long		config_base;
	unsigned long		counter_base;
	int			nmi;
	unsigned int		idx;
	u64			prev_count;
	s32			next_count;
	u64			irq_period;
};

/*
 * Hardcoded buffer length limit for now, for IRQ-fed events:
 */
#define PERF_DATA_BUFLEN	2048

/**
 * struct perf_data - performance counter IRQ data sampling ...
 */
struct perf_data {
	int			len;
	int			rd_idx;
	int			overrun;
	u8			data[PERF_DATA_BUFLEN];
};

/**
 * struct perf_counter - performance counter kernel representation:
 */
struct perf_counter {
	struct list_head		list;
	int				active;
#if BITS_PER_LONG == 64
	atomic64_t			count;
#else
	atomic_t			count32[2];
#endif
	u64				__irq_period;

	struct hw_perf_counter		hw;

	struct perf_counter_context	*ctx;
	struct task_struct		*task;

	/*
	 * Protect attach/detach:
	 */
	struct mutex			mutex;

	int				oncpu;
	int				cpu;

	s32				hw_event_type;
	enum perf_record_type		record_type;

	/* read() / irq related data */
	wait_queue_head_t		waitq;
	/* optional: for NMIs */
	int				wakeup_pending;
	struct perf_data		*irqdata;
	struct perf_data		*usrdata;
	struct perf_data		data[2];
};

/**
 * struct perf_counter_context - counter context structure
 *
 * Used as a container for task counters and CPU counters as well:
 */
struct perf_counter_context {
#ifdef CONFIG_PERF_COUNTERS
	/*
	 * Protect the list of counters:
	 */
	spinlock_t		lock;
	struct list_head	counters;
	int			nr_counters;
	int			nr_active;
	struct task_struct	*task;
#endif
};

/**
 * struct perf_counter_cpu_context - per cpu counter context structure
 */
struct perf_cpu_context {
	struct perf_counter_context	ctx;
	struct perf_counter_context	*task_ctx;
	int				active_oncpu;
	int				max_pertask;
};

/*
 * Set by architecture code:
 */
extern int perf_max_counters;

#ifdef CONFIG_PERF_COUNTERS
extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
extern void perf_counter_task_tick(struct task_struct *task, int cpu);
extern void perf_counter_init_task(struct task_struct *task);
extern void perf_counter_notify(struct pt_regs *regs);
extern void perf_counter_print_debug(void);
#else
static inline void
perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
static inline void
perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
static inline void
perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
static inline void perf_counter_init_task(struct task_struct *task)	{ }
static inline void perf_counter_notify(struct pt_regs *regs)		{ }
static inline void perf_counter_print_debug(void)			{ }
#endif

#endif /* _LINUX_PERF_COUNTER_H */
+9 −0
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@ struct sched_param {
#include <linux/fs_struct.h>
#include <linux/compiler.h>
#include <linux/completion.h>
#include <linux/perf_counter.h>
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
@@ -1326,6 +1327,7 @@ struct task_struct {
	struct list_head pi_state_list;
	struct futex_pi_state *pi_state_cache;
#endif
	struct perf_counter_context perf_counter_ctx;
#ifdef CONFIG_NUMA
	struct mempolicy *mempolicy;
	short il_next;
@@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk)	TASK_SIZE
#endif

/*
 * Call the function if the target task is executing on a CPU right now:
 */
extern void task_oncpu_function_call(struct task_struct *p,
				     void (*func) (void *info), void *info);


#ifdef CONFIG_MM_OWNER
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
+6 −0
Original line number Diff line number Diff line
@@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);

int kernel_execve(const char *filename, char *const argv[], char *const envp[]);

asmlinkage int
sys_perf_counter_open(u32 hw_event_type,
		      u32 hw_event_period,
		      u32 record_type,
		      pid_t pid,
		      int cpu);
#endif
+29 −0
Original line number Diff line number Diff line
@@ -732,6 +732,35 @@ config AIO
          by some high performance threaded applications. Disabling
          this option saves about 7k.

config HAVE_PERF_COUNTERS
	bool

menu "Performance Counters"

config PERF_COUNTERS
	bool "Kernel Performance Counters"
	depends on HAVE_PERF_COUNTERS
	default y
	help
	  Enable kernel support for performance counter hardware.

	  Performance counters are special hardware registers available
	  on most modern CPUs. These registers count the number of certain
	  types of hw events: such as instructions executed, cachemisses
	  suffered, or branches mis-predicted - without slowing down the
	  kernel or applications. These registers can also trigger interrupts
	  when a threshold number of events have passed - and can thus be
	  used to profile the code that runs on that CPU.

	  The Linux Performance Counter subsystem provides an abstraction of
	  these hardware capabilities, available via a system call. It
	  provides per task and per CPU counters, and it provides event
	  capabilities on top of those.

	  Say Y if unsure.

endmenu

config VM_EVENT_COUNTERS
	default y
	bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
Loading