Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f0453c73 authored by Raghavendra Rao Ananta's avatar Raghavendra Rao Ananta
Browse files

perf: Add support for user and kernel event sharing



The ARM PMU counters are limited in number. Even for counting similar
events, the PMU driver allocates a new counter. Hence, counters configured
to count similar events are shared. This was only possible for the kernel
clients, but not for user-space clients. Hence, as an extension to this,
the kernel and the user-space are now able to share the similar events.
The counters can be shared between user-space only clients, kernel only
clients, and among user-space and kernel clients. The kernel and user's
attr->type (hardware/raw) and attr->config should be same for them to
share the same counter.

Change-Id: I4a4b35bde6beaf8f2aef74e683a9804e31807013
Signed-off-by: default avatarRaghavendra Rao Ananta <rananta@codeaurora.org>
parent ef790030
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -716,6 +716,9 @@ struct perf_event {
	 * CPU wakes up and will be removed from the list after that
	 */
	struct list_head		dormant_event_entry;

	/* Is this event shared with other events */
	bool					shared;
#endif /* CONFIG_PERF_EVENTS */
};

+10 −0
Original line number Diff line number Diff line
@@ -1522,6 +1522,16 @@ config PERF_EVENTS

	  Say Y if unsure.

config PERF_USER_SHARE
	bool "Perf event sharing with user-space"
	help
	  Say yes here to enable the user-space sharing of events. The events
	  can be shared among other user-space events or with kernel created
	  events that has the same config and type event attributes.

	  Say N if unsure.


config DEBUG_PERF_USE_VMALLOC
	default n
	bool "Debug: use vmalloc to back perf mmap() buffers"
+207 −129
Original line number Diff line number Diff line
@@ -1770,6 +1770,10 @@ static void perf_group_detach(struct perf_event *event)
	if (event->group_leader != event) {
		list_del_init(&event->group_entry);
		event->group_leader->nr_siblings--;

		if (event->shared)
			event->group_leader = event;

		goto out;
	}

@@ -4462,15 +4466,23 @@ int perf_event_release_kernel(struct perf_event *event)

	if (!is_kernel_event(event)) {
		perf_remove_from_owner(event);
	} else {
		if (perf_event_delete_kernel_shared(event) > 0)
			return 0;
	}

	ctx = perf_event_ctx_lock(event);
	WARN_ON_ONCE(ctx->parent_ctx);
	perf_remove_from_context(event, DETACH_GROUP);

	if (perf_event_delete_kernel_shared(event) > 0) {
		perf_event__state_init(event);
		perf_install_in_context(ctx, event, event->cpu);

		perf_event_ctx_unlock(event, ctx);

		perf_event_enable(event);

		return 0;
	}

	raw_spin_lock_irq(&ctx->lock);
	/*
	 * Mark this event as STATE_DEAD, there is no external reference to it
@@ -9614,6 +9626,122 @@ static void account_event(struct perf_event *event)
	account_pmu_sb_event(event);
}

static struct perf_event *
perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
		struct task_struct *task,
		perf_overflow_handler_t overflow_handler,
		struct perf_event *group_leader)
{
	unsigned long idx;
	struct perf_event *event;
	struct shared_events_str *shrd_events;

	/*
	 * Have to be per cpu events for sharing
	 */
	if (!shared_events || (u32)cpu >= nr_cpu_ids)
		return NULL;

	/*
	 * Can't handle these type requests for sharing right now.
	 */
	if (task || overflow_handler || attr->sample_period ||
	    (attr->type != PERF_TYPE_HARDWARE &&
	     attr->type != PERF_TYPE_RAW)) {
		return NULL;
	}

	/*
	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
	 * perf does to access per cpu data structures
	 */
	shrd_events = per_cpu_ptr(shared_events, cpu);

	mutex_lock(&shrd_events->list_mutex);

	event = NULL;
	for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
		/* Do the comparisons field by field on the attr structure.
		 * This is because the user-space and kernel-space might
		 * be using different versions of perf. As a result,
		 * the fields' position in the memory and the size might not
		 * be the same. Hence memcmp() is not the best way to
		 * compare.
		 */
		if (attr->type == shrd_events->attr[idx].type &&
			attr->config == shrd_events->attr[idx].config) {

			event = shrd_events->events[idx];

			/* Do not change the group for this shared event */
			if (group_leader && event->group_leader != event) {
				event = NULL;
				continue;
			}

			event->shared = true;
			atomic_inc(&shrd_events->refcount[idx]);
			break;
		}
	}
	mutex_unlock(&shrd_events->list_mutex);

	return event;
}

static void
perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
				 struct task_struct *task,
				 perf_overflow_handler_t overflow_handler,
				 void *context,
				 struct perf_event *event)
{
	unsigned long idx;
	struct shared_events_str *shrd_events;

	/*
	 * Have to be per cpu events for sharing
	 */
	if (!shared_events || (u32)cpu >= nr_cpu_ids)
		return;

	/*
	 * Can't handle these type requests for sharing right now.
	 */
	if (overflow_handler || attr->sample_period ||
	    (attr->type != PERF_TYPE_HARDWARE &&
	     attr->type != PERF_TYPE_RAW)) {
		return;
	}

	/*
	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
	 * perf does to access per cpu data structures
	 */
	shrd_events = per_cpu_ptr(shared_events, cpu);

	mutex_lock(&shrd_events->list_mutex);

	/*
	 * If we are in this routine, we know that this event isn't already in
	 * the shared list. Check if slot available in shared list
	 */
	idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);

	if (idx >= SHARED_EVENTS_MAX)
		goto out;

	/*
	 * The event isn't in the list and there is an empty slot so add it.
	 */
	shrd_events->attr[idx]   = *attr;
	shrd_events->events[idx] = event;
	set_bit(idx, shrd_events->used_mask);
	atomic_set(&shrd_events->refcount[idx], 1);
out:
	mutex_unlock(&shrd_events->list_mutex);
}

/*
 * Allocate and initialize a event structure
 */
@@ -10084,6 +10212,31 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
	return gctx;
}

#ifdef CONFIG_PERF_USER_SHARE
static void perf_group_shared_event(struct perf_event *event,
		struct perf_event *group_leader)
{
	if (!event->shared || !group_leader)
		return;

	/* Do not attempt to change the group for this shared event */
	if (event->group_leader != event)
		return;

	/*
	 * Single events have the group leaders as themselves.
	 * As we now have a new group to attach to, remove from
	 * the previous group and attach it to the new group.
	 */
	perf_remove_from_context(event, DETACH_GROUP);

	event->group_leader	= group_leader;
	perf_event__state_init(event);

	perf_install_in_context(group_leader->ctx, event, event->cpu);
}
#endif

/**
 * sys_perf_event_open - open a performance event, associate it to a task/cpu
 *
@@ -10097,7 +10250,7 @@ SYSCALL_DEFINE5(perf_event_open,
		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
{
	struct perf_event *group_leader = NULL, *output_event = NULL;
	struct perf_event *event, *sibling;
	struct perf_event *event = NULL, *sibling;
	struct perf_event_attr attr;
	struct perf_event_context *ctx, *uninitialized_var(gctx);
	struct file *event_file = NULL;
@@ -10209,12 +10362,18 @@ SYSCALL_DEFINE5(perf_event_open,
	if (flags & PERF_FLAG_PID_CGROUP)
		cgroup_fd = pid;

#ifdef CONFIG_PERF_USER_SHARE
	event = perf_event_create_kernel_shared_check(&attr, cpu, task, NULL,
			group_leader);
#endif
	if (!event) {
		event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
					 NULL, NULL, cgroup_fd);
		if (IS_ERR(event)) {
			err = PTR_ERR(event);
			goto err_cred;
		}
	}

	if (is_sampling_event(event)) {
		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
@@ -10380,7 +10539,7 @@ SYSCALL_DEFINE5(perf_event_open,
	 * Must be under the same ctx::mutex as perf_install_in_context(),
	 * because we need to serialize with concurrent event creation.
	 */
	if (!exclusive_event_installable(event, ctx)) {
	if (!event->shared && !exclusive_event_installable(event, ctx)) {
		/* exclusive and group stuff are assumed mutually exclusive */
		WARN_ON_ONCE(move_group);

@@ -10451,10 +10610,17 @@ SYSCALL_DEFINE5(perf_event_open,
	perf_event__header_size(event);
	perf_event__id_header_size(event);

#ifdef CONFIG_PERF_USER_SHARE
	if (event->shared && group_leader)
		perf_group_shared_event(event, group_leader);
#endif

	if (!event->shared) {
		event->owner = current;

		perf_install_in_context(ctx, event, event->cpu);
		perf_unpin_context(ctx);
	}

	if (move_group)
		perf_event_ctx_unlock(group_leader, gctx);
@@ -10465,9 +10631,11 @@ SYSCALL_DEFINE5(perf_event_open,
		put_task_struct(task);
	}

	if (!event->shared) {
		mutex_lock(&current->perf_event_mutex);
		list_add_tail(&event->owner_entry, &current->perf_event_list);
		mutex_unlock(&current->perf_event_mutex);
	}

	/*
	 * Drop the reference on the group_event after placing the
@@ -10477,6 +10645,14 @@ SYSCALL_DEFINE5(perf_event_open,
	 */
	fdput(group);
	fd_install(event_fd, event_file);

#ifdef CONFIG_PERF_USER_SHARE
	/* Add the event to the shared events list */
	if (!event->shared)
		perf_event_create_kernel_shared_add(&attr, cpu,
				 task, NULL, ctx, event);
#endif

	return event_fd;

err_locked:
@@ -10508,102 +10684,6 @@ SYSCALL_DEFINE5(perf_event_open,
	return err;
}

static struct perf_event *
perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
				 struct task_struct *task,
				 perf_overflow_handler_t overflow_handler,
				 void *context)
{
	unsigned long idx;
	struct perf_event *event;
	struct shared_events_str *shrd_events;

	/*
	 * Have to be per cpu events for sharing
	 */
	if (!shared_events || (u32)cpu >= nr_cpu_ids)
		return NULL;

	/*
	 * Can't handle these type requests for sharing right now.
	 */
	if (task || context || overflow_handler ||
	    (attr->type != PERF_TYPE_HARDWARE &&
	     attr->type != PERF_TYPE_RAW))
		return NULL;

	/*
	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
	 * perf does to access per cpu data structures
	 */
	shrd_events = per_cpu_ptr(shared_events, cpu);

	mutex_lock(&shrd_events->list_mutex);

	event = NULL;
	for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
		if (memcmp(attr, &shrd_events->attr[idx],
		    sizeof(shrd_events->attr[idx])) == 0) {
			atomic_inc(&shrd_events->refcount[idx]);
			event = shrd_events->events[idx];
			break;
		}
	}
	mutex_unlock(&shrd_events->list_mutex);
	return event;
}

static void
perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
				 struct task_struct *task,
				 perf_overflow_handler_t overflow_handler,
				 void *context,
				 struct perf_event *event)
{
	unsigned long idx;
	struct shared_events_str *shrd_events;

	/*
	 * Have to be per cpu events for sharing
	 */
	if (!shared_events || (u32)cpu >= nr_cpu_ids)
		return;

	/*
	 * Can't handle these type requests for sharing right now.
	 */
	if (task || context || overflow_handler ||
	    (attr->type != PERF_TYPE_HARDWARE &&
	     attr->type != PERF_TYPE_RAW))
		return;

	/*
	 * Using per_cpu_ptr (or could do cross cpu call which is what most of
	 * perf does to access per cpu data structures
	 */
	shrd_events = per_cpu_ptr(shared_events, cpu);

	mutex_lock(&shrd_events->list_mutex);

	/*
	 * If we are in this routine, we know that this event isn't already in
	 * the shared list. Check if slot available in shared list
	 */
	idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);

	if (idx >= SHARED_EVENTS_MAX)
		goto out;

	/*
	 * The event isn't in the list and there is an empty slot so add it.
	 */
	shrd_events->attr[idx]   = *attr;
	shrd_events->events[idx] = event;
	set_bit(idx, shrd_events->used_mask);
	atomic_set(&shrd_events->refcount[idx], 1);
out:
	mutex_unlock(&shrd_events->list_mutex);
}

/**
 * perf_event_create_kernel_counter
@@ -10622,28 +10702,26 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
	struct perf_event *event;
	int err;

	/*
	 * Check if the requested attributes match a shared event
	 */
	event = perf_event_create_kernel_shared_check(attr, cpu,
				 task, overflow_handler, context);
	if (event)
		return event;

	/*
	 * Get the target context (task or percpu):
	 */

	event = perf_event_create_kernel_shared_check(attr, cpu, task,
						overflow_handler, NULL);
	if (!event) {
		event = perf_event_alloc(attr, cpu, task, NULL, NULL,
				overflow_handler, context, -1);
		if (IS_ERR(event)) {
			err = PTR_ERR(event);
			goto err;
		}
	}

	/* Mark owner so we could distinguish it from user events. */
	event->owner = TASK_TOMBSTONE;

	if (event->shared)
		return event;

	/*
	 * Get the target context (task or percpu):
	 */
	ctx = find_get_context(event->pmu, task, event);
	if (IS_ERR(ctx)) {
		err = PTR_ERR(ctx);