Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 93c29d13 authored by Saravana Kannan's avatar Saravana Kannan Committed by Amir Vajid
Browse files

PM / devfreq: memlat: Look for min stall% in addition to ratio criteria



Some workloads doing memory access might appear memory latency bound even
though they might not actually be memory latency bound.

This error can happen when the core that's running the workload is very
parallelized or can do out of order executions, etc so not all memory
accesses would actually stall the core.

This can also happen when the the memory access monitoring capabilities
aren't ideal and end up counting more kinds of memory accesses than what
would be ideal. In this case, the IPM ratio can be lower than what it would
be if we had ideal monitoring capabilities.

To account for these errors, if the core has a stall cycle counting
capabilities, check for a minimum stall% before the workload is considered
memory latency bound. This would help reduce the inaccuracies, but is not a
replacement for IPM ratio scheme because the stall% method doesn't allow us
to detect which level of memory the workload is latency bound on, but the
IPM ratio does (based on which memory accesses we use for calculating the
ratio).

Change-Id: I4363d7848584e5562f6683b5ad6b0f99017ec71b
Signed-off-by: default avatarSaravana Kannan <skannan@codeaurora.org>
[avajid@codeaurora.org: resolved minor merge conflicts and made minor styling changes]
Signed-off-by: default avatarAmir Vajid <avajid@codeaurora.org>
parent 6b881f24
Loading
Loading
Loading
Loading
+27 −3
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ enum ev_index {
	INST_IDX,
	CM_IDX,
	CYC_IDX,
	STALL_CYC_IDX,
	NUM_EVENTS
};
#define INST_EV		0x08
@@ -92,12 +93,19 @@ static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp)
{
	struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
	struct dev_stats *devstats = to_devstats(cpu_grp, cpu);
	unsigned long cyc_cnt;
	unsigned long cyc_cnt, stall_cnt;

	devstats->inst_count = read_event(&cpustats->events[INST_IDX]);
	devstats->mem_count = read_event(&cpustats->events[CM_IDX]);
	cyc_cnt = read_event(&cpustats->events[CYC_IDX]);
	devstats->freq = compute_freq(cpustats, cyc_cnt);
	if (cpustats->events[STALL_CYC_IDX].pevent) {
		stall_cnt = read_event(&cpustats->events[STALL_CYC_IDX]);
		stall_cnt = min(stall_cnt, cyc_cnt);
		devstats->stall_pct = mult_frac(100, stall_cnt, cyc_cnt);
	} else {
		devstats->stall_pct = 100;
	}
}

static unsigned long get_cnt(struct memlat_hwmon *hw)
@@ -117,7 +125,10 @@ static void delete_events(struct cpu_pmu_stats *cpustats)

	for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
		cpustats->events[i].prev_count = 0;
		if (cpustats->events[i].pevent) {
			perf_event_release_kernel(cpustats->events[i].pevent);
			cpustats->events[i].pevent = NULL;
		}
	}
}

@@ -135,6 +146,7 @@ static void stop_hwmon(struct memlat_hwmon *hw)
		devstats->inst_count = 0;
		devstats->mem_count = 0;
		devstats->freq = 0;
		devstats->stall_pct = 0;
	}
}

@@ -158,6 +170,7 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
	struct perf_event *pevent;
	struct perf_event_attr *attr;
	int err, i;
	unsigned int event_id;
	struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);

	/* Allocate an attribute for event initialization */
@@ -166,7 +179,11 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
		return -ENOMEM;

	for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
		attr->config = cpu_grp->event_ids[i];
		event_id = cpu_grp->event_ids[i];
		if (!event_id)
			continue;

		attr->config = event_id;
		pevent = perf_event_create_kernel_counter(attr, cpu, NULL,
							  NULL, NULL);
		if (IS_ERR(pevent))
@@ -282,6 +299,13 @@ static int arm_memlat_mon_driver_probe(struct platform_device *pdev)
	}
	cpu_grp->event_ids[INST_IDX] = event_id;

	ret = of_property_read_u32(dev->of_node, "qcom,stall-cycle-ev",
				   &event_id);
	if (ret)
		dev_dbg(dev, "Stall cycle event not specified. Event ignored.\n");
	else
		cpu_grp->event_ids[STALL_CYC_IDX] = event_id;

	for_each_cpu(cpu, &cpu_grp->cpus)
		to_devstats(cpu_grp, cpu)->id = cpu;

+8 −1
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@

struct memlat_node {
	unsigned int		ratio_ceil;
	unsigned int		stall_floor;
	bool			mon_started;
	bool			already_zero;
	struct list_head	list;
@@ -234,9 +235,11 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
					hw->core_stats[i].id,
					hw->core_stats[i].inst_count,
					hw->core_stats[i].mem_count,
					hw->core_stats[i].freq, ratio);
					hw->core_stats[i].freq,
					hw->core_stats[i].stall_pct, ratio);

		if (ratio <= node->ratio_ceil
		    && hw->core_stats[i].stall_pct >= node->stall_floor
		    && hw->core_stats[i].freq > max_freq) {
			lat_dev = i;
			max_freq = hw->core_stats[i].freq;
@@ -264,9 +267,13 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
show_attr(ratio_ceil);
store_attr(ratio_ceil, 1U, 20000U);
static DEVICE_ATTR_RW(ratio_ceil);
show_attr(stall_floor);
store_attr(stall_floor, 0U, 100U);
static DEVICE_ATTR_RW(stall_floor);

static struct attribute *dev_attr[] = {
	&dev_attr_ratio_ceil.attr,
	&dev_attr_stall_floor.attr,
	&dev_attr_freq_map.attr,
	NULL,
};
+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ struct dev_stats {
	unsigned long	inst_count;
	unsigned long	mem_count;
	unsigned long	freq;
	unsigned long	stall_pct;
};

struct core_dev_map {
+7 −3
Original line number Diff line number Diff line
@@ -631,9 +631,10 @@ TRACE_EVENT(cache_hwmon_update,
TRACE_EVENT(memlat_dev_meas,

	TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
		 unsigned long mem, unsigned long freq, unsigned int ratio),
		 unsigned long mem, unsigned long freq, unsigned int stall,
		 unsigned int ratio),

	TP_ARGS(name, dev_id, inst, mem, freq, ratio),
	TP_ARGS(name, dev_id, inst, mem, freq, stall, ratio),

	TP_STRUCT__entry(
		__string(name, name)
@@ -641,6 +642,7 @@ TRACE_EVENT(memlat_dev_meas,
		__field(unsigned long, inst)
		__field(unsigned long, mem)
		__field(unsigned long, freq)
		__field(unsigned int, stall)
		__field(unsigned int, ratio)
	),

@@ -650,15 +652,17 @@ TRACE_EVENT(memlat_dev_meas,
		__entry->inst = inst;
		__entry->mem = mem;
		__entry->freq = freq;
		__entry->stall = stall;
		__entry->ratio = ratio;
	),

	TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, ratio=%u",
	TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, ratio=%u",
		__get_str(name),
		__entry->dev_id,
		__entry->inst,
		__entry->mem,
		__entry->freq,
		__entry->stall,
		__entry->ratio)
);