Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3d3eb5fb authored by Raghavendra Rao Ananta's avatar Raghavendra Rao Ananta Committed by Rishabh Bhatnagar
Browse files

perf: add hotplug support



The change is a squash of the following four commits taken
from msm-4.14:

1) commit <b02d7648fbd1> ("enable perf to continue across
hotplug")
Currently perf hardware, software and tracepoint events are
deleted when a cpu is hotplugged out. This change restarts
the events after hotplug. In arm_pmu.c most of the code
for handline power collapse is reused for hotplug.
This change supercedes commit 1f0f95c5fe9e ("perf: add hotplug
support so that perf continues after hotplug") and uses the
new hotplug notification method.

2) commit <9768e7af40d6> ("disable perf_event_read during hotplug")
core.c should not allow perf_event_read access during hotplug.
DCVS may try to read events during hotplug startup or
shutdown. Set a flag to not allow access during hotplug.

3) commit <1fc690b7b8c6> ("perf: core: Avoid race condition when
releasing perf-events")
Before calling perf_event_release_kernel(), capture
the perf's pmus_mutex lock to prevent the CPU from going offline
during the operation.

4) commit <77257e46efea> ("perf: Manage CPU hotplug events at
core level")
the approach here is to achieve the hotplug management
at the perf-core level. The idea is to detach the event from the
context (perf_remove_from_context()), when the CPU is about to come
down and re-attach it back (perf_install_in_context()), when the CPU
comes back online. The approach involves removing the logic for
maintaining zombie events (PERF_EVENT_STATE_ZOMBIE) and let the
dormant list itself carry the events whose CPUs are offline.

Change-Id: I3738bdcaae2d2199ba3fb68ce77f637d867c5c14
Signed-off-by: default avatarRaghavendra Rao Ananta <rananta@codeaurora.org>
Signed-off-by: default avatarRishabh Bhatnagar <rishabhb@codeaurora.org>
parent 7a0a9e67
Loading
Loading
Loading
Loading
+46 −3
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>

static DEFINE_PER_CPU(bool, is_hotplugging);

/*
 * ARMv8 PMUv3 Performance Events handling code.
 * Common event types (some are defined in asm/perf_event.h).
@@ -870,8 +872,8 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
		struct perf_event *event = cpuc->events[idx];
		struct hw_perf_event *hwc;

		/* Ignore if we don't have an event. */
		if (!event)
		/* Ignore if we don't have an event */
		if (!event || event->state != PERF_EVENT_STATE_ACTIVE)
			continue;

		/*
@@ -1146,6 +1148,9 @@ static void armv8pmu_idle_update(struct arm_pmu *cpu_pmu)
	if (!cpu_pmu)
		return;

	if (__this_cpu_read(is_hotplugging))
		return;

	hw_events = this_cpu_ptr(cpu_pmu->hw_events);

	if (!hw_events)
@@ -1377,6 +1382,37 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
	{},
};

#ifdef CONFIG_HOTPLUG_CPU
static int perf_event_hotplug_coming_up(unsigned int cpu)
{
	per_cpu(is_hotplugging, cpu) = false;
	return 0;
}

static int perf_event_hotplug_going_down(unsigned int cpu)
{
	per_cpu(is_hotplugging, cpu) = true;
	return 0;
}

static int perf_event_cpu_hp_init(void)
{
	int ret;

	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
				"PERF_EVENT/CPUHP_AP_PERF_ONLINE",
				perf_event_hotplug_coming_up,
				perf_event_hotplug_going_down);
	if (ret)
		pr_err("CPU hotplug notifier for perf_event.c could not be registered: %d\n",
		       ret);

	return ret;
}
#else
static int perf_event_cpu_hp_init(void) { return 0; }
#endif

/*
 * Non DT systems have their micro/arch events probed at run-time.
 * A fairly complete list of generic events are provided and ones that
@@ -1389,7 +1425,14 @@ static const struct pmu_probe_info armv8_pmu_probe_table[] = {

static int armv8_pmu_device_probe(struct platform_device *pdev)
{
	int ret;
	int ret, cpu;

	for_each_possible_cpu(cpu)
		per_cpu(is_hotplugging, cpu) = false;

	ret = perf_event_cpu_hp_init();
	if (ret)
		return ret;

	/* set to true so armv8pmu_idle_update doesn't try to load
	 * hw_events before arm_pmu_device_probe has initialized it.
+4 −0
Original line number Diff line number Diff line
@@ -675,6 +675,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
		if (!event)
			continue;

		if (event->state != PERF_EVENT_STATE_ACTIVE)
			continue;

		switch (cmd) {
		case CPU_PM_ENTER:
			/*
@@ -821,6 +824,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
		 * validation).
		 */
		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS,
		.events_across_hotplug	= 1,
	};

	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
+12 −0
Original line number Diff line number Diff line
@@ -267,6 +267,8 @@ struct pmu {
	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
	int				task_ctx_nr;
	int				hrtimer_interval_ms;
	u32				events_across_hotplug:1,
					reserved:31;

	/* number of address filters this PMU can do */
	unsigned int			nr_addr_filters;
@@ -493,6 +495,7 @@ struct perf_addr_filters_head {
 * enum perf_event_state - the states of an event:
 */
enum perf_event_state {
	PERF_EVENT_STATE_DORMANT	= -5,
	PERF_EVENT_STATE_DEAD		= -4,
	PERF_EVENT_STATE_EXIT		= -3,
	PERF_EVENT_STATE_ERROR		= -2,
@@ -703,6 +706,13 @@ struct perf_event {
	struct list_head		sb_list;
	/* Is this event shared with other events */
	bool				shared;

	/*
	 * Entry into the list that holds the events whose CPUs
	 * are offline. These events will be installed once the
	 * CPU wakes up and will be removed from the list after that
	 */
	struct list_head		dormant_event_entry;
#endif /* CONFIG_PERF_EVENTS */
};

@@ -1419,9 +1429,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
#ifdef CONFIG_PERF_EVENTS
int perf_event_init_cpu(unsigned int cpu);
int perf_event_exit_cpu(unsigned int cpu);
int perf_event_restart_events(unsigned int cpu);
#else
#define perf_event_init_cpu	NULL
#define perf_event_exit_cpu	NULL
#define perf_event_restart_events NULL
#endif

#endif /* _LINUX_PERF_EVENT_H */
+1 −1
Original line number Diff line number Diff line
@@ -1431,7 +1431,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
	},
	[CPUHP_AP_PERF_ONLINE] = {
		.name			= "perf:online",
		.startup.single		= perf_event_init_cpu,
		.startup.single		= perf_event_restart_events,
		.teardown.single	= perf_event_exit_cpu,
	},
	[CPUHP_AP_WATCHDOG_ONLINE] = {
+111 −47
Original line number Diff line number Diff line
@@ -405,6 +405,7 @@ static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static DEFINE_PER_CPU(bool, is_idle);
static DEFINE_PER_CPU(bool, is_hotplugging);

static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -2511,6 +2512,23 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
	perf_pmu_enable(cpuctx->ctx.pmu);
}

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static LIST_HEAD(dormant_event_list);
static DEFINE_SPINLOCK(dormant_event_list_lock);

static void perf_prepare_install_in_context(struct perf_event *event)
{
	spin_lock(&dormant_event_list_lock);
	if (event->state == PERF_EVENT_STATE_DORMANT)
		goto out;

	event->state = PERF_EVENT_STATE_DORMANT;
	list_add_tail(&event->dormant_event_entry, &dormant_event_list);
out:
	spin_unlock(&dormant_event_list_lock);
}
#endif

/*
 * Cross CPU call to install and enable a performance event
 *
@@ -2599,6 +2617,13 @@ perf_install_in_context(struct perf_event_context *ctx,
	 */
	smp_store_release(&event->ctx, ctx);

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
	if (per_cpu(is_hotplugging, cpu)) {
		perf_prepare_install_in_context(event);
		return;
	}
#endif

	if (!task) {
		cpu_function_call(cpu, __perf_install_in_context, event);
		return;
@@ -2668,6 +2693,34 @@ perf_install_in_context(struct perf_event_context *ctx,
	raw_spin_unlock_irq(&ctx->lock);
}

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void perf_deferred_install_in_context(int cpu)
{
	struct perf_event *event, *tmp;
	struct perf_event_context *ctx;

	spin_lock(&dormant_event_list_lock);
	list_for_each_entry_safe(event, tmp, &dormant_event_list,
						dormant_event_entry) {
		if (cpu != event->cpu)
			continue;

		list_del(&event->dormant_event_entry);
		event->state = PERF_EVENT_STATE_INACTIVE;
		spin_unlock(&dormant_event_list_lock);

		ctx = event->ctx;

		mutex_lock(&ctx->mutex);
		perf_install_in_context(ctx, event, cpu);
		mutex_unlock(&ctx->mutex);

		spin_lock(&dormant_event_list_lock);
	}
	spin_unlock(&dormant_event_list_lock);
}
#endif

/*
 * Cross CPU call to enable a performance event
 */
@@ -3874,6 +3927,9 @@ static void __perf_event_read(void *info)
	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
	struct pmu *pmu = event->pmu;

	if (__this_cpu_read(is_hotplugging))
		return;

	/*
	 * If this is a task context, we need to check whether it is
	 * the current task context of this cpu.  If not it has been
@@ -4028,7 +4084,8 @@ static int perf_event_read(struct perf_event *event, bool group)
			return 0;
		if (cpu_isolated(event_cpu) ||
			(event->attr.exclude_idle &&
				per_cpu(is_idle, event_cpu)))
				per_cpu(is_idle, event_cpu)) ||
				per_cpu(is_hotplugging, event_cpu))
			active_event_skip_read = true;
	}
	if (state == PERF_EVENT_STATE_ACTIVE &&
@@ -4057,7 +4114,8 @@ static int perf_event_read(struct perf_event *event, bool group)
		preempt_enable();
		ret = data.ret;
	} else if (state == PERF_EVENT_STATE_INACTIVE ||
			active_event_skip_read) {
			(active_event_skip_read &&
			!per_cpu(is_hotplugging, event_cpu))) {
		struct perf_event_context *ctx = event->ctx;
		unsigned long flags;

@@ -4609,12 +4667,21 @@ static void put_event(struct perf_event *event)
 * object, it will not preserve its functionality. Once the last 'user'
 * gives up the object, we'll destroy the thing.
 */
int perf_event_release_kernel(struct perf_event *event)
static int __perf_event_release_kernel(struct perf_event *event)
{
	struct perf_event_context *ctx = event->ctx;
	struct perf_event *child, *tmp;
	LIST_HEAD(free_list);

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
	if (event->cpu != -1) {
		spin_lock(&dormant_event_list_lock);
		if (event->state == PERF_EVENT_STATE_DORMANT)
			list_del(&event->dormant_event_entry);
		spin_unlock(&dormant_event_list_lock);
	}
#endif

	/*
	 * If we got here through err_file: fput(event_file); we will not have
	 * attached to a context yet.
@@ -4721,6 +4788,17 @@ int perf_event_release_kernel(struct perf_event *event)
	put_event(event); /* Must be the 'last' reference */
	return 0;
}

int perf_event_release_kernel(struct perf_event *event)
{
	int ret;

	mutex_lock(&pmus_lock);
	ret = __perf_event_release_kernel(event);
	mutex_unlock(&pmus_lock);

	return ret;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);

/*
@@ -4937,6 +5015,15 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
	struct perf_event_context *ctx;
	int ret;

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
	spin_lock(&dormant_event_list_lock);
	if (event->state == PERF_EVENT_STATE_DORMANT) {
		spin_unlock(&dormant_event_list_lock);
		return 0;
	}
	spin_unlock(&dormant_event_list_lock);
#endif

	ctx = perf_event_ctx_lock(event);
	ret = __perf_read(event, buf, count);
	perf_event_ctx_unlock(event, ctx);
@@ -8306,6 +8393,7 @@ static struct pmu perf_swevent = {
	.start		= perf_swevent_start,
	.stop		= perf_swevent_stop,
	.read		= perf_swevent_read,
	.events_across_hotplug = 1,
};

#ifdef CONFIG_EVENT_TRACING
@@ -8450,6 +8538,7 @@ static struct pmu perf_tracepoint = {
	.start		= perf_swevent_start,
	.stop		= perf_swevent_stop,
	.read		= perf_swevent_read,
	.events_across_hotplug = 1,
};

#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
@@ -9349,6 +9438,7 @@ static struct pmu perf_cpu_clock = {
	.start		= cpu_clock_event_start,
	.stop		= cpu_clock_event_stop,
	.read		= cpu_clock_event_read,
	.events_across_hotplug = 1,
};

/*
@@ -9430,6 +9520,7 @@ static struct pmu perf_task_clock = {
	.start		= task_clock_event_start,
	.stop		= task_clock_event_stop,
	.read		= task_clock_event_read,
	.events_across_hotplug = 1,
};

static void perf_pmu_nop_void(struct pmu *pmu)
@@ -10140,6 +10231,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
	mutex_init(&event->child_mutex);
	INIT_LIST_HEAD(&event->child_list);

	INIT_LIST_HEAD(&event->dormant_event_entry);
	INIT_LIST_HEAD(&event->event_entry);
	INIT_LIST_HEAD(&event->sibling_list);
	INIT_LIST_HEAD(&event->active_list);
@@ -10911,23 +11003,6 @@ SYSCALL_DEFINE5(perf_event_open,
		goto err_locked;
	}

	if (!task) {
		/*
		 * Check if the @cpu we're creating an event for is online.
		 *
		 * We use the perf_cpu_context::ctx::mutex to serialize against
		 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
		 */
		struct perf_cpu_context *cpuctx =
			container_of(ctx, struct perf_cpu_context, ctx);

		if (!cpuctx->online) {
			err = -ENODEV;
			goto err_locked;
		}
	}


	/*
	 * Must be under the same ctx::mutex as perf_install_in_context(),
	 * because we need to serialize with concurrent event creation.
@@ -11130,21 +11205,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
		goto err_unlock;
	}

	if (!task) {
		/*
		 * Check if the @cpu we're creating an event for is online.
		 *
		 * We use the perf_cpu_context::ctx::mutex to serialize against
		 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
		 */
		struct perf_cpu_context *cpuctx =
			container_of(ctx, struct perf_cpu_context, ctx);
		if (!cpuctx->online) {
			err = -ENODEV;
			goto err_unlock;
		}
	}

	if (!exclusive_event_installable(event, ctx)) {
		err = -EBUSY;
		goto err_unlock;
@@ -11853,6 +11913,8 @@ static void __init perf_event_init_all_cpus(void)
		INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
		INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
		per_cpu(is_hotplugging, cpu) = false;
		per_cpu(is_idle, cpu) = false;
	}
}

@@ -11872,32 +11934,35 @@ void perf_swevent_init_cpu(unsigned int cpu)
}

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void __perf_event_exit_context(void *__info)
int perf_event_restart_events(unsigned int cpu)
{
	struct perf_event_context *ctx = __info;
	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
	struct perf_event *event;
	mutex_lock(&pmus_lock);
	per_cpu(is_hotplugging, cpu) = false;
	perf_deferred_install_in_context(cpu);
	mutex_unlock(&pmus_lock);

	raw_spin_lock(&ctx->lock);
	ctx_sched_out(ctx, cpuctx, EVENT_TIME);
	list_for_each_entry(event, &ctx->event_list, event_entry)
		__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
	raw_spin_unlock(&ctx->lock);
	return 0;
}

static void perf_event_exit_cpu_context(int cpu)
{
	struct perf_cpu_context *cpuctx;
	struct perf_event_context *ctx;
	struct perf_event *event, *event_tmp;
	struct pmu *pmu;

	mutex_lock(&pmus_lock);
	per_cpu(is_hotplugging, cpu) = true;
	list_for_each_entry(pmu, &pmus, entry) {
		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
		ctx = &cpuctx->ctx;

		mutex_lock(&ctx->mutex);
		smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
		list_for_each_entry_safe(event, event_tmp, &ctx->event_list,
								event_entry) {
			perf_remove_from_context(event, DETACH_GROUP);
			if (event->pmu->events_across_hotplug)
				perf_prepare_install_in_context(event);
		}
		cpuctx->online = 0;
		mutex_unlock(&ctx->mutex);
	}
@@ -11978,7 +12043,6 @@ static struct notifier_block perf_event_idle_nb = {
	.notifier_call = event_idle_notif,
};


void __init perf_event_init(void)
{
	int ret, cpu;