perf: Add support for user and kernel event sharing (f0453c73) · Commits · e / devices / android_kernel_oneplus_sm8150

include/linux/perf_event.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -716,6 +716,9 @@ struct perf_event {
		* CPU wakes up and will be removed from the list after that
		*/
		struct list_head dormant_event_entry;

		/* Is this event shared with other events */
		bool shared;
		#endif /* CONFIG_PERF_EVENTS */
		};

init/Kconfig

+10 −0

Original line number	Diff line number	Diff line
		@@ -1522,6 +1522,16 @@ config PERF_EVENTS

		Say Y if unsure.

		config PERF_USER_SHARE
		bool "Perf event sharing with user-space"
		help
		Say yes here to enable the user-space sharing of events. The events
		can be shared among other user-space events or with kernel created
		events that has the same config and type event attributes.

		Say N if unsure.


		config DEBUG_PERF_USE_VMALLOC
		default n
		bool "Debug: use vmalloc to back perf mmap() buffers"

kernel/events/core.c

+207 −129

Original line number	Diff line number	Diff line
		@@ -1770,6 +1770,10 @@ static void perf_group_detach(struct perf_event *event)
		if (event->group_leader != event) {
		list_del_init(&event->group_entry);
		event->group_leader->nr_siblings--;

		if (event->shared)
		event->group_leader = event;

		goto out;
		}

		@@ -4462,15 +4466,23 @@ int perf_event_release_kernel(struct perf_event *event)

		if (!is_kernel_event(event)) {
		perf_remove_from_owner(event);
		} else {
		if (perf_event_delete_kernel_shared(event) > 0)
		return 0;
		}

		ctx = perf_event_ctx_lock(event);
		WARN_ON_ONCE(ctx->parent_ctx);
		perf_remove_from_context(event, DETACH_GROUP);

		if (perf_event_delete_kernel_shared(event) > 0) {
		perf_event__state_init(event);
		perf_install_in_context(ctx, event, event->cpu);

		perf_event_ctx_unlock(event, ctx);

		perf_event_enable(event);

		return 0;
		}

		raw_spin_lock_irq(&ctx->lock);
		/*
		* Mark this event as STATE_DEAD, there is no external reference to it
		@@ -9614,6 +9626,122 @@ static void account_event(struct perf_event *event)
		account_pmu_sb_event(event);
		}

		static struct perf_event *
		perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
		struct task_struct *task,
		perf_overflow_handler_t overflow_handler,
		struct perf_event *group_leader)
		{
		unsigned long idx;
		struct perf_event *event;
		struct shared_events_str *shrd_events;

		/*
		* Have to be per cpu events for sharing
		*/
		if (!shared_events \|\| (u32)cpu >= nr_cpu_ids)
		return NULL;

		/*
		* Can't handle these type requests for sharing right now.
		*/
		if (task \|\| overflow_handler \|\| attr->sample_period \|\|
		(attr->type != PERF_TYPE_HARDWARE &&
		attr->type != PERF_TYPE_RAW)) {
		return NULL;
		}

		/*
		* Using per_cpu_ptr (or could do cross cpu call which is what most of
		* perf does to access per cpu data structures
		*/
		shrd_events = per_cpu_ptr(shared_events, cpu);

		mutex_lock(&shrd_events->list_mutex);

		event = NULL;
		for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
		/* Do the comparisons field by field on the attr structure.
		* This is because the user-space and kernel-space might
		* be using different versions of perf. As a result,
		* the fields' position in the memory and the size might not
		* be the same. Hence memcmp() is not the best way to
		* compare.
		*/
		if (attr->type == shrd_events->attr[idx].type &&
		attr->config == shrd_events->attr[idx].config) {

		event = shrd_events->events[idx];

		/* Do not change the group for this shared event */
		if (group_leader && event->group_leader != event) {
		event = NULL;
		continue;
		}

		event->shared = true;
		atomic_inc(&shrd_events->refcount[idx]);
		break;
		}
		}
		mutex_unlock(&shrd_events->list_mutex);

		return event;
		}

		static void
		perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
		struct task_struct *task,
		perf_overflow_handler_t overflow_handler,
		void *context,
		struct perf_event *event)
		{
		unsigned long idx;
		struct shared_events_str *shrd_events;

		/*
		* Have to be per cpu events for sharing
		*/
		if (!shared_events \|\| (u32)cpu >= nr_cpu_ids)
		return;

		/*
		* Can't handle these type requests for sharing right now.
		*/
		if (overflow_handler \|\| attr->sample_period \|\|
		(attr->type != PERF_TYPE_HARDWARE &&
		attr->type != PERF_TYPE_RAW)) {
		return;
		}

		/*
		* Using per_cpu_ptr (or could do cross cpu call which is what most of
		* perf does to access per cpu data structures
		*/
		shrd_events = per_cpu_ptr(shared_events, cpu);

		mutex_lock(&shrd_events->list_mutex);

		/*
		* If we are in this routine, we know that this event isn't already in
		* the shared list. Check if slot available in shared list
		*/
		idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);

		if (idx >= SHARED_EVENTS_MAX)
		goto out;

		/*
		* The event isn't in the list and there is an empty slot so add it.
		*/
		shrd_events->attr[idx] = *attr;
		shrd_events->events[idx] = event;
		set_bit(idx, shrd_events->used_mask);
		atomic_set(&shrd_events->refcount[idx], 1);
		out:
		mutex_unlock(&shrd_events->list_mutex);
		}

		/*
		* Allocate and initialize a event structure
		*/
		@@ -10084,6 +10212,31 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
		return gctx;
		}

		#ifdef CONFIG_PERF_USER_SHARE
		static void perf_group_shared_event(struct perf_event *event,
		struct perf_event *group_leader)
		{
		if (!event->shared \|\| !group_leader)
		return;

		/* Do not attempt to change the group for this shared event */
		if (event->group_leader != event)
		return;

		/*
		* Single events have the group leaders as themselves.
		* As we now have a new group to attach to, remove from
		* the previous group and attach it to the new group.
		*/
		perf_remove_from_context(event, DETACH_GROUP);

		event->group_leader = group_leader;
		perf_event__state_init(event);

		perf_install_in_context(group_leader->ctx, event, event->cpu);
		}
		#endif

		/**
		* sys_perf_event_open - open a performance event, associate it to a task/cpu
		*
		@@ -10097,7 +10250,7 @@ SYSCALL_DEFINE5(perf_event_open,
		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
		{
		struct perf_event group_leader = NULL, output_event = NULL;
		struct perf_event event, sibling;
		struct perf_event event = NULL, sibling;
		struct perf_event_attr attr;
		struct perf_event_context ctx, uninitialized_var(gctx);
		struct file *event_file = NULL;
		@@ -10209,12 +10362,18 @@ SYSCALL_DEFINE5(perf_event_open,
		if (flags & PERF_FLAG_PID_CGROUP)
		cgroup_fd = pid;

		#ifdef CONFIG_PERF_USER_SHARE
		event = perf_event_create_kernel_shared_check(&attr, cpu, task, NULL,
		group_leader);
		#endif
		if (!event) {
		event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
		NULL, NULL, cgroup_fd);
		if (IS_ERR(event)) {
		err = PTR_ERR(event);
		goto err_cred;
		}
		}

		if (is_sampling_event(event)) {
		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
		@@ -10380,7 +10539,7 @@ SYSCALL_DEFINE5(perf_event_open,
		* Must be under the same ctx::mutex as perf_install_in_context(),
		* because we need to serialize with concurrent event creation.
		*/
		if (!exclusive_event_installable(event, ctx)) {
		if (!event->shared && !exclusive_event_installable(event, ctx)) {
		/* exclusive and group stuff are assumed mutually exclusive */
		WARN_ON_ONCE(move_group);

		@@ -10451,10 +10610,17 @@ SYSCALL_DEFINE5(perf_event_open,
		perf_event__header_size(event);
		perf_event__id_header_size(event);

		#ifdef CONFIG_PERF_USER_SHARE
		if (event->shared && group_leader)
		perf_group_shared_event(event, group_leader);
		#endif

		if (!event->shared) {
		event->owner = current;

		perf_install_in_context(ctx, event, event->cpu);
		perf_unpin_context(ctx);
		}

		if (move_group)
		perf_event_ctx_unlock(group_leader, gctx);
		@@ -10465,9 +10631,11 @@ SYSCALL_DEFINE5(perf_event_open,
		put_task_struct(task);
		}

		if (!event->shared) {
		mutex_lock(&current->perf_event_mutex);
		list_add_tail(&event->owner_entry, &current->perf_event_list);
		mutex_unlock(&current->perf_event_mutex);
		}

		/*
		* Drop the reference on the group_event after placing the
		@@ -10477,6 +10645,14 @@ SYSCALL_DEFINE5(perf_event_open,
		*/
		fdput(group);
		fd_install(event_fd, event_file);

		#ifdef CONFIG_PERF_USER_SHARE
		/* Add the event to the shared events list */
		if (!event->shared)
		perf_event_create_kernel_shared_add(&attr, cpu,
		task, NULL, ctx, event);
		#endif

		return event_fd;

		err_locked:
		@@ -10508,102 +10684,6 @@ SYSCALL_DEFINE5(perf_event_open,
		return err;
		}

		static struct perf_event *
		perf_event_create_kernel_shared_check(struct perf_event_attr *attr, int cpu,
		struct task_struct *task,
		perf_overflow_handler_t overflow_handler,
		void *context)
		{
		unsigned long idx;
		struct perf_event *event;
		struct shared_events_str *shrd_events;

		/*
		* Have to be per cpu events for sharing
		*/
		if (!shared_events \|\| (u32)cpu >= nr_cpu_ids)
		return NULL;

		/*
		* Can't handle these type requests for sharing right now.
		*/
		if (task \|\| context \|\| overflow_handler \|\|
		(attr->type != PERF_TYPE_HARDWARE &&
		attr->type != PERF_TYPE_RAW))
		return NULL;

		/*
		* Using per_cpu_ptr (or could do cross cpu call which is what most of
		* perf does to access per cpu data structures
		*/
		shrd_events = per_cpu_ptr(shared_events, cpu);

		mutex_lock(&shrd_events->list_mutex);

		event = NULL;
		for_each_set_bit(idx, shrd_events->used_mask, SHARED_EVENTS_MAX) {
		if (memcmp(attr, &shrd_events->attr[idx],
		sizeof(shrd_events->attr[idx])) == 0) {
		atomic_inc(&shrd_events->refcount[idx]);
		event = shrd_events->events[idx];
		break;
		}
		}
		mutex_unlock(&shrd_events->list_mutex);
		return event;
		}

		static void
		perf_event_create_kernel_shared_add(struct perf_event_attr *attr, int cpu,
		struct task_struct *task,
		perf_overflow_handler_t overflow_handler,
		void *context,
		struct perf_event *event)
		{
		unsigned long idx;
		struct shared_events_str *shrd_events;

		/*
		* Have to be per cpu events for sharing
		*/
		if (!shared_events \|\| (u32)cpu >= nr_cpu_ids)
		return;

		/*
		* Can't handle these type requests for sharing right now.
		*/
		if (task \|\| context \|\| overflow_handler \|\|
		(attr->type != PERF_TYPE_HARDWARE &&
		attr->type != PERF_TYPE_RAW))
		return;

		/*
		* Using per_cpu_ptr (or could do cross cpu call which is what most of
		* perf does to access per cpu data structures
		*/
		shrd_events = per_cpu_ptr(shared_events, cpu);

		mutex_lock(&shrd_events->list_mutex);

		/*
		* If we are in this routine, we know that this event isn't already in
		* the shared list. Check if slot available in shared list
		*/
		idx = find_first_zero_bit(shrd_events->used_mask, SHARED_EVENTS_MAX);

		if (idx >= SHARED_EVENTS_MAX)
		goto out;

		/*
		* The event isn't in the list and there is an empty slot so add it.
		*/
		shrd_events->attr[idx] = *attr;
		shrd_events->events[idx] = event;
		set_bit(idx, shrd_events->used_mask);
		atomic_set(&shrd_events->refcount[idx], 1);
		out:
		mutex_unlock(&shrd_events->list_mutex);
		}

		/**
		* perf_event_create_kernel_counter
		@@ -10622,28 +10702,26 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
		struct perf_event *event;
		int err;

		/*
		* Check if the requested attributes match a shared event
		*/
		event = perf_event_create_kernel_shared_check(attr, cpu,
		task, overflow_handler, context);
		if (event)
		return event;

		/*
		* Get the target context (task or percpu):
		*/

		event = perf_event_create_kernel_shared_check(attr, cpu, task,
		overflow_handler, NULL);
		if (!event) {
		event = perf_event_alloc(attr, cpu, task, NULL, NULL,
		overflow_handler, context, -1);
		if (IS_ERR(event)) {
		err = PTR_ERR(event);
		goto err;
		}
		}

		/* Mark owner so we could distinguish it from user events. */
		event->owner = TASK_TOMBSTONE;

		if (event->shared)
		return event;

		/*
		* Get the target context (task or percpu):
		*/
		ctx = find_get_context(event->pmu, task, event);
		if (IS_ERR(ctx)) {
		err = PTR_ERR(ctx);