Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f66a381 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

perf counters: restructure the API



Impact: clean up new API

Thorough cleanup of the new perf counters API, we now get clean separation
of the various concepts:

 - introduce perf_counter_hw_event to separate out the event source details

 - move special type flags into separate attributes: PERF_COUNT_NMI,
   PERF_COUNT_RAW

 - extend the type to u64 and reserve it fully to the architecture in the
   raw type case.

And make use of all these changes in the core and x86 perfcounters code.

Also change the syscall signature to:

  asmlinkage int sys_perf_counter_open(

	struct perf_counter_hw_event	*hw_event_uptr		__user,
	pid_t				pid,
	int				cpu,
	int				group_fd);

( Note that group_fd is unused for now - it's reserved for the counter
  groups abstraction. )

Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent dfa7c899
Loading
Loading
Loading
Loading
+14 −15
Original line number Original line Diff line number Diff line
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
 */
 */
int hw_perf_counter_init(struct perf_counter *counter)
int hw_perf_counter_init(struct perf_counter *counter)
{
{
	struct perf_counter_hw_event *hw_event = &counter->hw_event;
	struct hw_perf_counter *hwc = &counter->hw;
	struct hw_perf_counter *hwc = &counter->hw;
	u32 hw_event_type = counter->event.hw_event_type;


	if (unlikely(!perf_counters_initialized))
	if (unlikely(!perf_counters_initialized))
		return -EINVAL;
		return -EINVAL;
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter)
	hwc->nmi = 0;
	hwc->nmi = 0;
	if (capable(CAP_SYS_ADMIN)) {
	if (capable(CAP_SYS_ADMIN)) {
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
		if (hw_event_type & PERF_COUNT_NMI)
		if (hw_event->nmi)
			hwc->nmi = 1;
			hwc->nmi = 1;
	}
	}


	hwc->config_base	= MSR_ARCH_PERFMON_EVENTSEL0;
	hwc->config_base	= MSR_ARCH_PERFMON_EVENTSEL0;
	hwc->counter_base	= MSR_ARCH_PERFMON_PERFCTR0;
	hwc->counter_base	= MSR_ARCH_PERFMON_PERFCTR0;


	hwc->irq_period = counter->event.hw_event_period;
	hwc->irq_period		= hw_event->irq_period;
	/*
	/*
	 * Intel PMCs cannot be accessed sanely above 32 bit width,
	 * Intel PMCs cannot be accessed sanely above 32 bit width,
	 * so we install an artificial 1<<31 period regardless of
	 * so we install an artificial 1<<31 period regardless of
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter)
	if (!hwc->irq_period)
	if (!hwc->irq_period)
		hwc->irq_period = 0x7FFFFFFF;
		hwc->irq_period = 0x7FFFFFFF;


	hwc->next_count = -((s32) hwc->irq_period);
	hwc->next_count	= -(s32)hwc->irq_period;


	/*
	/*
	 * Raw event type provide the config in the event structure
	 * Raw event type provide the config in the event structure
	 */
	 */
	hw_event_type &= ~PERF_COUNT_NMI;
	if (hw_event->raw) {
	if (hw_event_type == PERF_COUNT_RAW) {
		hwc->config |= hw_event->type;
		hwc->config |= counter->event.hw_raw_ctrl;
	} else {
	} else {
		if (hw_event_type >= max_intel_perfmon_events)
		if (hw_event->type >= max_intel_perfmon_events)
			return -EINVAL;
			return -EINVAL;
		/*
		/*
		 * The generic map:
		 * The generic map:
		 */
		 */
		hwc->config |= intel_perfmon_event_map[hw_event_type];
		hwc->config |= intel_perfmon_event_map[hw_event->type];
	}
	}
	counter->wakeup_pending = 0;
	counter->wakeup_pending = 0;


@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
	int bit;
	int bit;


	list_for_each_entry(counter, &ctx->counters, list) {
	list_for_each_entry(counter, &ctx->counters, list) {
		if (counter->record_type != PERF_RECORD_SIMPLE ||
		if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
		    counter == leader)
		    counter == leader)
			continue;
			continue;


@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
				perf_save_and_restart(counter);
				perf_save_and_restart(counter);
			}
			}
		}
		}
		perf_store_irq_data(leader, counter->event.hw_event_type);
		perf_store_irq_data(leader, counter->hw_event.type);
		perf_store_irq_data(leader, atomic64_counter_read(counter));
		perf_store_irq_data(leader, atomic64_counter_read(counter));
	}
	}
}
}
@@ -410,7 +409,7 @@ again:


		perf_save_and_restart(counter);
		perf_save_and_restart(counter);


		switch (counter->record_type) {
		switch (counter->hw_event.record_type) {
		case PERF_RECORD_SIMPLE:
		case PERF_RECORD_SIMPLE:
			continue;
			continue;
		case PERF_RECORD_IRQ:
		case PERF_RECORD_IRQ:
@@ -418,7 +417,7 @@ again:
			break;
			break;
		case PERF_RECORD_GROUP:
		case PERF_RECORD_GROUP:
			perf_store_irq_data(counter,
			perf_store_irq_data(counter,
					    counter->event.hw_event_type);
					    counter->hw_event.type);
			perf_store_irq_data(counter,
			perf_store_irq_data(counter,
					    atomic64_counter_read(counter));
					    atomic64_counter_read(counter));
			perf_handle_group(counter, &status, &ack);
			perf_handle_group(counter, &status, &ack);
+62 −36
Original line number Original line Diff line number Diff line
@@ -24,40 +24,68 @@
struct task_struct;
struct task_struct;


/*
/*
 * Generalized hardware event types, used by the hw_event_type parameter
 * User-space ABI bits:
 * of the sys_perf_counter_open() syscall:
 */

/*
 * Generalized performance counter event types, used by the hw_event.type
 * parameter of the sys_perf_counter_open() syscall:
 */
 */
enum hw_event_types {
enum hw_event_types {
	PERF_COUNT_CYCLES,
	PERF_COUNT_INSTRUCTIONS,
	PERF_COUNT_CACHE_REFERENCES,
	PERF_COUNT_CACHE_MISSES,
	PERF_COUNT_BRANCH_INSTRUCTIONS,
	PERF_COUNT_BRANCH_MISSES,
	/*
	/*
	 * If this bit is set in the type, then trigger NMI sampling:
	 * Common hardware events, generalized by the kernel:
	 */
	 */
	PERF_COUNT_NMI			= (1 << 30),
	PERF_COUNT_CYCLES		=  0,
	PERF_COUNT_RAW			= (1 << 31),
	PERF_COUNT_INSTRUCTIONS		=  1,
	PERF_COUNT_CACHE_REFERENCES	=  2,
	PERF_COUNT_CACHE_MISSES		=  3,
	PERF_COUNT_BRANCH_INSTRUCTIONS	=  4,
	PERF_COUNT_BRANCH_MISSES	=  5,

	/*
	 * Special "software" counters provided by the kernel, even if
	 * the hardware does not support performance counters. These
	 * counters measure various physical and sw events of the
	 * kernel (and allow the profiling of them as well):
	 */
	PERF_COUNT_CPU_CLOCK		= -1,
	PERF_COUNT_TASK_CLOCK		= -2,
	PERF_COUNT_PAGE_FAULTS		= -3,
	PERF_COUNT_CONTEXT_SWITCHES	= -4,
};
};


/*
/*
 * IRQ-notification data record type:
 * IRQ-notification data record type:
 */
 */
enum perf_record_type {
enum perf_counter_record_type {
	PERF_RECORD_SIMPLE,
	PERF_RECORD_SIMPLE		=  0,
	PERF_RECORD_IRQ,
	PERF_RECORD_IRQ			=  1,
	PERF_RECORD_GROUP,
	PERF_RECORD_GROUP		=  2,
};
};


struct perf_counter_event {
/*
	u32			hw_event_type;
 * Hardware event to monitor via a performance monitoring counter:
	u32			hw_event_period;
 */
	u64			hw_raw_ctrl;
struct perf_counter_hw_event {
	u64			type;

	u64			irq_period;
	u32			record_type;

	u32			disabled     :  1, /* off by default */
				nmi	     :  1, /* NMI sampling   */
				raw	     :  1, /* raw event type */
				__reserved_1 : 29;

	u64			__reserved_2;
};
};


/*
 * Kernel-internal data types:
 */

/**
/**
 * struct hw_perf_counter - performance counter hardware details
 * struct hw_perf_counter - performance counter hardware details:
 */
 */
struct hw_perf_counter {
struct hw_perf_counter {
	u64				config;
	u64				config;
@@ -66,8 +94,8 @@ struct hw_perf_counter {
	int				nmi;
	int				nmi;
	unsigned int			idx;
	unsigned int			idx;
	u64				prev_count;
	u64				prev_count;
	s32			next_count;
	u64				irq_period;
	u64				irq_period;
	s32				next_count;
};
};


/*
/*
@@ -96,7 +124,7 @@ struct perf_counter {
#else
#else
	atomic_t			count32[2];
	atomic_t			count32[2];
#endif
#endif
	struct perf_counter_event	event;
	struct perf_counter_hw_event	hw_event;
	struct hw_perf_counter		hw;
	struct hw_perf_counter		hw;


	struct perf_counter_context	*ctx;
	struct perf_counter_context	*ctx;
@@ -110,8 +138,6 @@ struct perf_counter {
	int				oncpu;
	int				oncpu;
	int				cpu;
	int				cpu;


	enum perf_record_type		record_type;

	/* read() / irq related data */
	/* read() / irq related data */
	wait_queue_head_t		waitq;
	wait_queue_head_t		waitq;
	/* optional: for NMIs */
	/* optional: for NMIs */
+8 −4
Original line number Original line Diff line number Diff line
@@ -54,7 +54,7 @@ struct compat_stat;
struct compat_timeval;
struct compat_timeval;
struct robust_list_head;
struct robust_list_head;
struct getcpu_cache;
struct getcpu_cache;
struct perf_counter_event;
struct perf_counter_hw_event;


#include <linux/types.h>
#include <linux/types.h>
#include <linux/aio_abi.h>
#include <linux/aio_abi.h>
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);


int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);


asmlinkage int

sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
asmlinkage int sys_perf_counter_open(
		      pid_t pid, int cpu, int masterfd);

	struct perf_counter_hw_event	*hw_event_uptr		__user,
	pid_t				pid,
	int				cpu,
	int				group_fd);
#endif
#endif
+22 −16
Original line number Original line Diff line number Diff line
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
{
	struct perf_counter *counter = file->private_data;
	struct perf_counter *counter = file->private_data;


	switch (counter->record_type) {
	switch (counter->hw_event.record_type) {
	case PERF_RECORD_SIMPLE:
	case PERF_RECORD_SIMPLE:
		return perf_read_hw(counter, buf, count);
		return perf_read_hw(counter, buf, count);


@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
 * Allocate and initialize a counter structure
 * Allocate and initialize a counter structure
 */
 */
static struct perf_counter *
static struct perf_counter *
perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
{
{
	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
	struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);


@@ -721,28 +721,34 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
	counter->irqdata		= &counter->data[0];
	counter->irqdata		= &counter->data[0];
	counter->usrdata		= &counter->data[1];
	counter->usrdata		= &counter->data[1];
	counter->cpu			= cpu;
	counter->cpu			= cpu;
	counter->record_type	= record_type;
	counter->hw_event		= *hw_event;
	counter->event		= *event;
	counter->wakeup_pending		= 0;
	counter->wakeup_pending		= 0;


	return counter;
	return counter;
}
}


/**
/**
 * sys_perf_task_open - open a performance counter associate it to a task
 * sys_perf_task_open - open a performance counter, associate it to a task/cpu
 * @hw_event_type:	event type for monitoring/sampling...
 *
 * @hw_event_uptr:	event type attributes for monitoring/sampling
 * @pid:		target pid
 * @pid:		target pid
 * @cpu:		target cpu
 * @group_fd:		group leader counter fd
 */
 */
asmlinkage int
asmlinkage int sys_perf_counter_open(
sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,

		      pid_t pid, int cpu, int masterfd)
	struct perf_counter_hw_event	*hw_event_uptr		__user,
	pid_t				pid,
	int				cpu,
	int				group_fd)

{
{
	struct perf_counter_context *ctx;
	struct perf_counter_context *ctx;
	struct perf_counter_event event;
	struct perf_counter_hw_event hw_event;
	struct perf_counter *counter;
	struct perf_counter *counter;
	int ret;
	int ret;


	if (copy_from_user(&event, uevent, sizeof(event)) != 0)
	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
		return -EFAULT;
		return -EFAULT;


	ctx = find_get_context(pid, cpu);
	ctx = find_get_context(pid, cpu);
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
		return PTR_ERR(ctx);
		return PTR_ERR(ctx);


	ret = -ENOMEM;
	ret = -ENOMEM;
	counter = perf_counter_alloc(&event, cpu, record_type);
	counter = perf_counter_alloc(&hw_event, cpu);
	if (!counter)
	if (!counter)
		goto err_put_context;
		goto err_put_context;