memlat: Optimize perf event reads when possible (913317fe) · Commits · e / devices / android_kernel_xiaomi_nabu

drivers/devfreq/arm-memlat-mon.c

+35 −8

Original line number	Original line	Diff line number	Diff line
	@@ -49,6 +49,7 @@ enum ev_index {
	struct event_data {		struct event_data {
	struct perf_event *pevent;		struct perf_event *pevent;
	unsigned long prev_count;		unsigned long prev_count;
			bool any_cpu_readable;
	};		};

	struct cpu_pmu_stats {		struct cpu_pmu_stats {
	@@ -101,7 +102,37 @@ static inline unsigned long read_event(struct event_data *event)
	if (!event->pevent)		if (!event->pevent)
	return 0;		return 0;

	total = perf_event_read_value(event->pevent, &enabled, &running);		if (event->any_cpu_readable) {
			if (perf_event_read_local(event->pevent, &total))
			return 0;
			} else {
			unsigned int ev_cpu = READ_ONCE(event->pevent->oncpu);
			bool local_read;
			int ret;

			if (ev_cpu >= nr_cpu_ids)
			return 0;

			local_irq_disable();
			if ((local_read = (ev_cpu == raw_smp_processor_id())))
			ret = perf_event_read_local(event->pevent, &total);
			local_irq_enable();

			if (!local_read) {
			/*
			* Some SCM calls take very long (20+ ms), so the perf
			* event IPI could lag on the CPU running the SCM call.
			*/
			if (under_scm_call(ev_cpu))
			return 0;

			total = perf_event_read_value(event->pevent, &enabled,
			&running);
			} else if (ret) {
			return ret;
			}
			}

	ev_count = total - event->prev_count;		ev_count = total - event->prev_count;
	event->prev_count = total;		event->prev_count = total;
	return ev_count;		return ev_count;
	@@ -141,13 +172,6 @@ static void delete_events(struct cpu_pmu_stats *cpustats)
	{		{
	int i;		int i;

	/*
	* Some of SCM call is very heavy(+20ms) so perf IPI could
	* be stuck on the CPU which contributes long latency.
	*/
	if (under_scm_call())
	return;

	for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {		for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
	cpustats->events[i].prev_count = 0;		cpustats->events[i].prev_count = 0;
	if (cpustats->events[i].pevent) {		if (cpustats->events[i].pevent) {
	@@ -193,6 +217,7 @@ static struct perf_event_attr *alloc_attr(void)

	static int set_events(struct cpu_grp_info *cpu_grp, int cpu)		static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
	{		{
			static struct cpumask all_cpu_mask = CPU_MASK_ALL;
	struct perf_event *pevent;		struct perf_event *pevent;
	struct perf_event_attr *attr;		struct perf_event_attr *attr;
	int err, i;		int err, i;
	@@ -216,6 +241,8 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
	goto err_out;		goto err_out;
	cpustats->events[i].pevent = pevent;		cpustats->events[i].pevent = pevent;
	perf_event_enable(pevent);		perf_event_enable(pevent);
			cpustats->events[i].any_cpu_readable =
			cpumask_equal(&pevent->readable_on_cpus, &all_cpu_mask);
	}		}

	kfree(attr);		kfree(attr);

drivers/soc/qcom/scm.c

+12 −9

Original line number	Original line	Diff line number	Diff line
	@@ -36,7 +36,7 @@
	#define SCM_EBUSY -55		#define SCM_EBUSY -55
	#define SCM_V2_EBUSY -12		#define SCM_V2_EBUSY -12

	static atomic_t scm_call_count = ATOMIC_INIT(0);		static DEFINE_PER_CPU(atomic_t, scm_call_count);
	static DEFINE_MUTEX(scm_lock);		static DEFINE_MUTEX(scm_lock);

	/*		/*
	@@ -433,11 +433,12 @@ static int ___scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
	static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,		static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
	u64 ret1, u64 ret2, u64 *ret3)		u64 ret1, u64 ret2, u64 *ret3)
	{		{
			atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
	int ret;		int ret;

	atomic_inc(&scm_call_count);		atomic_inc(cnt);
	ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3);		ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3);
	atomic_dec(&scm_call_count);		atomic_dec(cnt);

	return ret;		return ret;
	}		}
	@@ -495,11 +496,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
	static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,		static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
	u64 ret1, u64 ret2, u64 *ret3)		u64 ret1, u64 ret2, u64 *ret3)
	{		{
			atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
	int ret;		int ret;

	atomic_inc(&scm_call_count);		atomic_inc(cnt);
	ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);		ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
	atomic_dec(&scm_call_count);		atomic_dec(cnt);

	return ret;		return ret;
	}		}
	@@ -557,11 +559,12 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
	static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,		static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
	u64 ret1, u64 ret2, u64 *ret3)		u64 ret1, u64 ret2, u64 *ret3)
	{		{
			atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
	int ret;		int ret;

	atomic_inc(&scm_call_count);		atomic_inc(cnt);
	ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);		ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
	atomic_dec(&scm_call_count);		atomic_dec(cnt);

	return ret;		return ret;
	}		}
	@@ -1352,7 +1355,7 @@ inline int scm_enable_mem_protection(void)
	#endif		#endif
	EXPORT_SYMBOL(scm_enable_mem_protection);		EXPORT_SYMBOL(scm_enable_mem_protection);

	bool under_scm_call(void)		bool under_scm_call(int cpu)
	{		{
	return atomic_read(&scm_call_count);		return atomic_read(per_cpu_ptr(&scm_call_count, cpu));
	}		}

include/soc/qcom/scm.h

+2 −2

Original line number	Original line	Diff line number	Diff line
	@@ -124,7 +124,7 @@ struct scm_hdcp_req {
	};		};

	extern struct mutex scm_lmh_lock;		extern struct mutex scm_lmh_lock;
	extern bool under_scm_call(void);		extern bool under_scm_call(int cpu);

	#else		#else

	@@ -188,7 +188,7 @@ static inline int scm_enable_mem_protection(void)
	return 0;		return 0;
	}		}

	extern bool under_scm_call(void)		extern bool under_scm_call(int cpu)
	{		{
	return false;		return false;
	}		}