Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a7db37ad authored by Raghavendra Rao Ananta's avatar Raghavendra Rao Ananta
Browse files

perf: add hotplug support



The change is a squash of the following two commits:

1) enable perf to continue across hotplug:
Currently perf hardware, software and tracepoint events are
deleted when a cpu is hotplugged out. This change restarts
the events after hotplug. In arm_pmu.c most of the code
for handline power collapse is reused for hotplug.
This change supercedes commit 1f0f95c5fe9e ("perf: add hotplug
support so that perf continues after hotplug") and uses the
new hotplug notification method.

2) disable perf_event_read during hotplug:
core.c should not allow perf_event_read access during hotplug.
DCVS may try to read events during hotplug startup or
shutdown. Set a flag to not allow access during hotplug.

Change-Id: I3c5f1f532d451a096d2d3ee976e0a15fca826e8b
Signed-off-by: default avatarPatrick Fay <pfay@codeaurora.org>
[rananta@codeaurora.org: resolved trivial conflicts]
Signed-off-by: default avatarRaghavendra Rao Ananta <rananta@codeaurora.org>
parent e0a2db21
Loading
Loading
Loading
Loading
+44 −2
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>

static DEFINE_PER_CPU(bool, is_hotplugging);

/*
 * ARMv8 PMUv3 Performance Events handling code.
 * Common event types (some are defined in asm/perf_event.h).
@@ -942,6 +944,9 @@ static void armv8pmu_idle_update(struct arm_pmu *cpu_pmu)
	if (!cpu_pmu)
		return;

	if (__this_cpu_read(is_hotplugging))
		return;

	hw_events = this_cpu_ptr(cpu_pmu->hw_events);

	if (!hw_events)
@@ -995,7 +1000,6 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)

	pmu_idle_nb->cpu_pmu = cpu_pmu;
	pmu_idle_nb->perf_cpu_idle_nb.notifier_call = perf_cpu_idle_notifier;
	idle_notifier_register(&pmu_idle_nb->perf_cpu_idle_nb);

	ret = smp_call_function_any(&cpu_pmu->supported_cpus,
				    __armv8pmu_probe_pmu,
@@ -1176,6 +1180,37 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
	{},
};

#ifdef CONFIG_HOTPLUG_CPU
static int perf_event_hotplug_coming_up(unsigned int cpu)
{
	per_cpu(is_hotplugging, cpu) = false;
	return 0;
}

static int perf_event_hotplug_going_down(unsigned int cpu)
{
	per_cpu(is_hotplugging, cpu) = true;
	return 0;
}

static int perf_event_cpu_hp_init(void)
{
	int ret;

	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
				"PERF_EVENT/CPUHP_AP_PERF_ONLINE",
				perf_event_hotplug_coming_up,
				perf_event_hotplug_going_down);
	if (ret)
		pr_err("CPU hotplug notifier for perf_event.c could not be registered: %d\n",
		       ret);

	return ret;
}
#else
static int perf_event_cpu_hp_init(void) { return 0; }
#endif

/*
 * Non DT systems have their micro/arch events probed at run-time.
 * A fairly complete list of generic events are provided and ones that
@@ -1188,7 +1223,14 @@ static const struct pmu_probe_info armv8_pmu_probe_table[] = {

static int armv8_pmu_device_probe(struct platform_device *pdev)
{
	int ret;
	int ret, cpu;

	for_each_possible_cpu(cpu)
		per_cpu(is_hotplugging, cpu) = false;

	ret = perf_event_cpu_hp_init();
	if (ret)
		return ret;

	/* set to true so armv8pmu_idle_update doesn't try to load
	 * hw_events before arm_pmu_device_probe has initialized it.
+134 −74
Original line number Diff line number Diff line
@@ -26,6 +26,9 @@

#include <asm/irq_regs.h>

#define USE_CPUHP_STATE CPUHP_AP_PERF_ARM_STARTING
#define USE_CPUHP_STR "AP_PERF_ARM_STARTING"

static int
armpmu_map_cache_event(const unsigned (*cache_map)
				      [PERF_COUNT_HW_CACHE_MAX]
@@ -539,13 +542,18 @@ void armpmu_free_irq(struct arm_pmu *armpmu, int cpu)
	if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
		return;

	armpmu->pmu_state = ARM_PMU_STATE_GOING_DOWN;

	if (irq_is_percpu(irq)) {
		free_percpu_irq(irq, &hw_events->percpu_pmu);
		cpumask_clear(&armpmu->active_irqs);
		armpmu->percpu_irq = -1;
		armpmu->pmu_state = ARM_PMU_STATE_OFF;
		return;
	}

	free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
	armpmu->pmu_state = ARM_PMU_STATE_OFF;
}

void armpmu_free_irqs(struct arm_pmu *armpmu)
@@ -568,6 +576,7 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
	if (irq_is_percpu(irq) && cpumask_empty(&armpmu->active_irqs)) {
		err = request_percpu_irq(irq, handler, "arm-pmu",
					 &hw_events->percpu_pmu);
		armpmu->percpu_irq = irq;
	} else if (irq_is_percpu(irq)) {
		int other_cpu = cpumask_first(&armpmu->active_irqs);
		int other_irq = per_cpu(hw_events->irq, other_cpu);
@@ -604,6 +613,8 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
	if (err)
		goto err_out;

	armpmu->pmu_state = ARM_PMU_STATE_RUNNING;

	cpumask_set_cpu(cpu, &armpmu->active_irqs);
	return 0;

@@ -625,53 +636,12 @@ int armpmu_request_irqs(struct arm_pmu *armpmu)
	return err;
}

static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
{
	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
	return per_cpu(hw_events->irq, cpu);
}

/*
 * PMU hardware loses all context when a CPU goes offline.
 * When a CPU is hotplugged back in, since some hardware registers are
 * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
 * junk values out of them.
 */
static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
	int irq;

	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
		return 0;
	if (pmu->reset)
		pmu->reset(pmu);

	irq = armpmu_get_cpu_irq(pmu, cpu);
	if (irq) {
		if (irq_is_percpu(irq)) {
			enable_percpu_irq(irq, IRQ_TYPE_NONE);
			return 0;
		}
	}

	return 0;
}

static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
{
	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
	int irq;

	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
		return 0;

	irq = armpmu_get_cpu_irq(pmu, cpu);
	if (irq && irq_is_percpu(irq))
		disable_percpu_irq(irq);

	return 0;
}
struct cpu_pm_pmu_args {
	struct arm_pmu	*armpmu;
	unsigned long	cmd;
	int		cpu;
	int		ret;
};

#ifdef CONFIG_CPU_PM
static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
@@ -719,15 +689,19 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
	}
}

static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
			     void *v)
static void cpu_pm_pmu_common(void *info)
{
	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
	struct cpu_pm_pmu_args *data	= info;
	struct arm_pmu *armpmu		= data->armpmu;
	unsigned long cmd		= data->cmd;
	int cpu				= data->cpu;
	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);

	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
		return NOTIFY_DONE;
	if (!cpumask_test_cpu(cpu, &armpmu->supported_cpus)) {
		data->ret = NOTIFY_DONE;
		return;
	}

	/*
	 * Always reset the PMU registers on power-up even if
@@ -736,8 +710,12 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
	if (cmd == CPU_PM_EXIT && armpmu->reset)
		armpmu->reset(armpmu);

	if (!enabled)
		return NOTIFY_OK;
	if (!enabled) {
		data->ret = NOTIFY_OK;
		return;
	}

	data->ret = NOTIFY_OK;

	switch (cmd) {
	case CPU_PM_ENTER:
@@ -745,15 +723,29 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
		cpu_pm_pmu_setup(armpmu, cmd);
		break;
	case CPU_PM_EXIT:
		cpu_pm_pmu_setup(armpmu, cmd);
	case CPU_PM_ENTER_FAILED:
		cpu_pm_pmu_setup(armpmu, cmd);
		armpmu->start(armpmu);
		break;
	default:
		return NOTIFY_DONE;
		data->ret = NOTIFY_DONE;
		break;
	}

	return NOTIFY_OK;
	return;
}

static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
			     void *v)
{
	struct cpu_pm_pmu_args data = {
		.armpmu	= container_of(b, struct arm_pmu, cpu_pm_nb),
		.cmd	= cmd,
		.cpu	= smp_processor_id(),
	};

	cpu_pm_pmu_common(&data);
	return data.ret;
}

static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
@@ -766,11 +758,75 @@ static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
{
	cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
}

#else
static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
static void cpu_pm_pmu_common(void *info) { }
#endif

/*
 * PMU hardware loses all context when a CPU goes offline.
 * When a CPU is hotplugged back in, since some hardware registers are
 * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
 * junk values out of them.
 */
static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
{
	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);

	struct cpu_pm_pmu_args data = {
		.armpmu	= pmu,
		.cpu	= (int)cpu,
	};

	if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus))
		return 0;

	data.cmd    = CPU_PM_EXIT;
	cpu_pm_pmu_common(&data);
	if (data.ret == NOTIFY_DONE)
		return 0;

	if (data.armpmu->pmu_state != ARM_PMU_STATE_OFF &&
		data.armpmu->plat_device) {
		int irq = data.armpmu->percpu_irq;

		if (irq > 0 && irq_is_percpu(irq))
			enable_percpu_irq(irq, IRQ_TYPE_NONE);

	}

	return 0;
}

static int arm_perf_stopping_cpu(unsigned int cpu, struct hlist_node *node)
{
	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);

	struct cpu_pm_pmu_args data = {
		.armpmu	= pmu,
		.cpu	= (int)cpu,
	};

	if (!pmu || !cpumask_test_cpu(cpu, &pmu->supported_cpus))
		return 0;

	data.cmd = CPU_PM_ENTER;
	cpu_pm_pmu_common(&data);
	/* Disarm the PMU IRQ before disappearing. */
	if (data.armpmu->pmu_state == ARM_PMU_STATE_RUNNING &&
		data.armpmu->plat_device) {
		int irq = data.armpmu->percpu_irq;

		if (irq > 0 && irq_is_percpu(irq))
			disable_percpu_irq(irq);

	}

	return 0;
}

static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
	int err;
@@ -782,12 +838,12 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)

	err = cpu_pm_pmu_register(cpu_pmu);
	if (err)
		goto out_unregister;
		goto out_unreg_perf_starting;

	return 0;

out_unregister:
	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
out_unreg_perf_starting:
	cpuhp_state_remove_instance_nocalls(USE_CPUHP_STATE,
					    &cpu_pmu->node);
out:
	return err;
@@ -796,7 +852,7 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
{
	cpu_pm_pmu_unregister(cpu_pmu);
	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
	cpuhp_state_remove_instance_nocalls(USE_CPUHP_STATE,
					    &cpu_pmu->node);
}

@@ -836,6 +892,7 @@ struct arm_pmu *armpmu_alloc(void)
		 * validation).
		 */
		.capabilities		= PERF_PMU_CAP_HETEROGENEOUS_CPUS,
		.events_across_hotplug	= 1,
	};

	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
@@ -878,6 +935,9 @@ int armpmu_register(struct arm_pmu *pmu)
	if (!__oprofile_cpu_pmu)
		__oprofile_cpu_pmu = pmu;

	pmu->pmu_state  = ARM_PMU_STATE_OFF;
	pmu->percpu_irq = -1;

	pr_info("enabled with %s PMU driver, %d counters available\n",
		pmu->name, pmu->num_events);

@@ -895,9 +955,9 @@ static int arm_pmu_hp_init(void)
	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
				      "perf/arm/pmu:starting",
				      arm_perf_starting_cpu,
				      arm_perf_teardown_cpu);
				      arm_perf_stopping_cpu);
	if (ret)
		pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
		pr_err("CPU hotplug ARM PMU STOPPING registering failed: %d\n",
		       ret);
	return ret;
}
+8 −0
Original line number Diff line number Diff line
@@ -90,6 +90,12 @@ enum armpmu_attr_groups {
	ARMPMU_NR_ATTR_GROUPS
};

enum armpmu_pmu_states {
	ARM_PMU_STATE_OFF,
	ARM_PMU_STATE_RUNNING,
	ARM_PMU_STATE_GOING_DOWN,
};

struct arm_pmu {
	struct pmu	pmu;
	cpumask_t	active_irqs;
@@ -111,6 +117,8 @@ struct arm_pmu {
	void		(*reset)(void *);
	int		(*map_event)(struct perf_event *event);
	int		num_events;
	int		pmu_state;
	int		percpu_irq;
	u64		max_period;
	bool		secure_access; /* 32-bit ARM only */
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
+2 −0
Original line number Diff line number Diff line
@@ -266,6 +266,8 @@ struct pmu {
	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
	int				task_ctx_nr;
	int				hrtimer_interval_ms;
	u32				events_across_hotplug:1,
					reserved:31;

	/* number of address filters this PMU can do */
	unsigned int			nr_addr_filters;
+89 −3
Original line number Diff line number Diff line
@@ -379,6 +379,7 @@ static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static DEFINE_PER_CPU(bool, is_idle);
static DEFINE_PER_CPU(bool, is_hotplugging);

static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -3632,6 +3633,9 @@ static void __perf_event_read(void *info)
	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
	struct pmu *pmu = event->pmu;

	if (__this_cpu_read(is_hotplugging))
		return;

	/*
	 * If this is a task context, we need to check whether it is
	 * the current task context of this cpu.  If not it has been
@@ -3757,7 +3761,8 @@ static int perf_event_read(struct perf_event *event, bool group)
			return 0;
		if (cpu_isolated(event_cpu) ||
			(event->attr.exclude_idle &&
				per_cpu(is_idle, event_cpu)))
				per_cpu(is_idle, event_cpu)) ||
				per_cpu(is_hotplugging, event_cpu))
			active_event_skip_read = true;
	}

@@ -3787,7 +3792,8 @@ static int perf_event_read(struct perf_event *event, bool group)
		preempt_enable();
		ret = data.ret;
	} else if (event->state == PERF_EVENT_STATE_INACTIVE ||
			active_event_skip_read) {
			(active_event_skip_read &&
			!per_cpu(is_hotplugging, event_cpu))) {
		struct perf_event_context *ctx = event->ctx;
		unsigned long flags;

@@ -7923,6 +7929,7 @@ static struct pmu perf_swevent = {
	.start		= perf_swevent_start,
	.stop		= perf_swevent_stop,
	.read		= perf_swevent_read,
	.events_across_hotplug = 1,
};

#ifdef CONFIG_EVENT_TRACING
@@ -8072,6 +8079,7 @@ static struct pmu perf_tracepoint = {
	.start		= perf_swevent_start,
	.stop		= perf_swevent_stop,
	.read		= perf_swevent_read,
	.events_across_hotplug = 1,
};

static inline void perf_tp_register(void)
@@ -8816,6 +8824,7 @@ static struct pmu perf_cpu_clock = {
	.start		= cpu_clock_event_start,
	.stop		= cpu_clock_event_stop,
	.read		= cpu_clock_event_read,
	.events_across_hotplug = 1,
};

/*
@@ -8897,6 +8906,7 @@ static struct pmu perf_task_clock = {
	.start		= task_clock_event_start,
	.stop		= task_clock_event_stop,
	.read		= task_clock_event_read,
	.events_across_hotplug = 1,
};

static void perf_pmu_nop_void(struct pmu *pmu)
@@ -11098,6 +11108,8 @@ static void __init perf_event_init_all_cpus(void)
		INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
		INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
		per_cpu(is_hotplugging, cpu) = false;
		per_cpu(is_idle, cpu) = false;
	}
}

@@ -11117,6 +11129,59 @@ void perf_swevent_init_cpu(unsigned int cpu)
}

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
static void
check_hotplug_start_event(struct perf_event *event)
{
	if (event->pmu->events_across_hotplug &&
	    event->attr.type == PERF_TYPE_SOFTWARE &&
	    event->pmu->start)
		event->pmu->start(event, 0);
}

static int perf_event_start_swevents(unsigned int cpu)
{
	struct perf_event_context *ctx;
	struct pmu *pmu;
	struct perf_event *event;
	int idx;

	idx = srcu_read_lock(&pmus_srcu);
	list_for_each_entry_rcu(pmu, &pmus, entry) {
		ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
		mutex_lock(&ctx->mutex);
		raw_spin_lock(&ctx->lock);
		list_for_each_entry(event, &ctx->event_list, event_entry)
			check_hotplug_start_event(event);
		raw_spin_unlock(&ctx->lock);
		mutex_unlock(&ctx->mutex);
	}
	srcu_read_unlock(&pmus_srcu, idx);
	per_cpu(is_hotplugging, cpu) = false;
	return 0;
}

/*
 * If keeping events across hotplugging is supported, do not
 * remove the event list so event lives beyond CPU hotplug.
 * The context is exited via an fd close path when userspace
 * is done and the target CPU is online. If software clock
 * event is active, then stop hrtimer associated with it.
 * Start the timer when the CPU comes back online.
 */
static void
check_hotplug_remove_from_context(struct perf_event *event,
			   struct perf_cpu_context *cpuctx,
			   struct perf_event_context *ctx)
{
	if (event->pmu->events_across_hotplug &&
	    event->attr.type == PERF_TYPE_SOFTWARE &&
	    event->pmu->stop)
		event->pmu->stop(event, PERF_EF_UPDATE);
	else if (!event->pmu->events_across_hotplug)
		__perf_remove_from_context(event, cpuctx,
			ctx, (void *)DETACH_GROUP);
}

static void __perf_event_exit_context(void *__info)
{
	struct perf_event_context *ctx = __info;
@@ -11125,7 +11190,7 @@ static void __perf_event_exit_context(void *__info)

	raw_spin_lock(&ctx->lock);
	list_for_each_entry(event, &ctx->event_list, event_entry)
		__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
		check_hotplug_remove_from_context(event, cpuctx, ctx);
	raw_spin_unlock(&ctx->lock);
}

@@ -11179,6 +11244,7 @@ int perf_event_init_cpu(unsigned int cpu)

int perf_event_exit_cpu(unsigned int cpu)
{
	per_cpu(is_hotplugging, cpu) = true;
	perf_event_exit_cpu_context(cpu);
	return 0;
}
@@ -11222,6 +11288,24 @@ static struct notifier_block perf_event_idle_nb = {
	.notifier_call = event_idle_notif,
};

#ifdef CONFIG_HOTPLUG_CPU
static int perf_cpu_hp_init(void)
{
	int ret;

	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ONLINE,
				"PERF/CORE/CPUHP_AP_PERF_ONLINE",
				perf_event_start_swevents,
				perf_event_exit_cpu);
	if (ret)
		pr_err("CPU hotplug notifier for perf core could not be registered: %d\n",
		       ret);

	return ret;
}
#else
static int perf_cpu_hp_init(void) { return 0; }
#endif

void __init perf_event_init(void)
{
@@ -11238,6 +11322,8 @@ void __init perf_event_init(void)
	perf_event_init_cpu(smp_processor_id());
	idle_notifier_register(&perf_event_idle_nb);
	register_reboot_notifier(&perf_reboot_notifier);
	ret = perf_cpu_hp_init();
	WARN(ret, "core perf_cpu_hp_init() failed with: %d", ret);

	ret = init_hw_breakpoint();
	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);