Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eccbcb11 authored by Rama Aparna Mallavarapu's avatar Rama Aparna Mallavarapu
Browse files

PM / devfreq: memlat: Look for min stall% in addition to ratio criteria



Some workloads doing memory access might appear memory latency bound even
though they might not actually be memory latency bound.

This error can happen when the core that's running the workload is very
parallelized or can do out of order executions, etc so not all memory
accesses would actually stall the core.

This can also happen when the the memory access monitoring capabilities
aren't ideal and end up counting more kinds of memory accesses than what
would be ideal. In this case, the IPM ratio can be lower than what it would
be if we had ideal monitoring capabilities.

To account for these errors, if the core has a stall cycle counting
capabilities, check for a minimum stall% before the workload is considered
memory latency bound. This would help reduce the inaccuracies, but is not a
replacement for IPM ratio scheme because the stall% method doesn't allow us
to detect which level of memory the workload is latency bound on, but the
IPM ratio does (based on which memory accesses we use for calculating the
ratio).

Change-Id: I4363d7848584e5562f6683b5ad6b0f99017ec71b
Signed-off-by: default avatarSaravana Kannan <skannan@codeaurora.org>
Signed-off-by: default avatarRama Aparna Mallavarapu <aparnam@codeaurora.org>
parent ac423f36
Loading
Loading
Loading
Loading
+27 −3
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ enum ev_index {
	INST_IDX,
	CM_IDX,
	CYC_IDX,
	STALL_CYC_IDX,
	NUM_EVENTS
};
#define INST_EV		0x08
@@ -92,12 +93,19 @@ static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp)
{
	struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
	struct dev_stats *devstats = to_devstats(cpu_grp, cpu);
	unsigned long cyc_cnt;
	unsigned long cyc_cnt, stall_cnt;

	devstats->inst_count = read_event(&cpustats->events[INST_IDX]);
	devstats->mem_count = read_event(&cpustats->events[CM_IDX]);
	cyc_cnt = read_event(&cpustats->events[CYC_IDX]);
	devstats->freq = compute_freq(cpustats, cyc_cnt);
	if (cpustats->events[STALL_CYC_IDX].pevent) {
		stall_cnt = read_event(&cpustats->events[STALL_CYC_IDX]);
		stall_cnt = min(stall_cnt, cyc_cnt);
		devstats->stall_pct = mult_frac(100, stall_cnt, cyc_cnt);
	} else {
		devstats->stall_pct = 100;
	}
}

static unsigned long get_cnt(struct memlat_hwmon *hw)
@@ -117,7 +125,10 @@ static void delete_events(struct cpu_pmu_stats *cpustats)

	for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
		cpustats->events[i].prev_count = 0;
		if (cpustats->events[i].pevent) {
			perf_event_release_kernel(cpustats->events[i].pevent);
			cpustats->events[i].pevent = NULL;
		}
	}
}

@@ -135,6 +146,7 @@ static void stop_hwmon(struct memlat_hwmon *hw)
		devstats->inst_count = 0;
		devstats->mem_count = 0;
		devstats->freq = 0;
		devstats->stall_pct = 0;
	}
}

@@ -159,6 +171,7 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
	struct perf_event *pevent;
	struct perf_event_attr *attr;
	int err, i;
	unsigned int event_id;
	struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);

	/* Allocate an attribute for event initialization */
@@ -167,7 +180,11 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
		return -ENOMEM;

	for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) {
		attr->config = cpu_grp->event_ids[i];
		event_id = cpu_grp->event_ids[i];
		if (!event_id)
			continue;

		attr->config = event_id;
		pevent = perf_event_create_kernel_counter(attr, cpu, NULL,
							  NULL, NULL);
		if (IS_ERR(pevent))
@@ -282,6 +299,13 @@ static int arm_memlat_mon_driver_probe(struct platform_device *pdev)
	}
	cpu_grp->event_ids[INST_IDX] = event_id;

	ret = of_property_read_u32(dev->of_node, "qcom,stall-cycle-ev",
				   &event_id);
	if (ret)
		dev_dbg(dev, "Stall cycle event not specified. Event ignored.\n");
	else
		cpu_grp->event_ids[STALL_CYC_IDX] = event_id;

	for_each_cpu(cpu, &cpu_grp->cpus)
		to_devstats(cpu_grp, cpu)->id = cpu;

+6 −1
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@

struct memlat_node {
	unsigned int ratio_ceil;
	unsigned int stall_floor;
	bool mon_started;
	bool already_zero;
	struct list_head list;
@@ -239,9 +240,11 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
					hw->core_stats[i].id,
					hw->core_stats[i].inst_count,
					hw->core_stats[i].mem_count,
					hw->core_stats[i].freq, ratio);
					hw->core_stats[i].freq,
					hw->core_stats[i].stall_pct, ratio);

		if (ratio <= node->ratio_ceil
		    && hw->core_stats[i].stall_pct >= node->stall_floor
		    && hw->core_stats[i].freq > max_freq) {
			lat_dev = i;
			max_freq = hw->core_stats[i].freq;
@@ -267,9 +270,11 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
}

gov_attr(ratio_ceil, 1U, 10000U);
gov_attr(stall_floor, 0U, 100U);

static struct attribute *dev_attr[] = {
	&dev_attr_ratio_ceil.attr,
	&dev_attr_stall_floor.attr,
	&dev_attr_freq_map.attr,
	NULL,
};
+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ struct dev_stats {
	unsigned long inst_count;
	unsigned long mem_count;
	unsigned long freq;
	unsigned long stall_pct;
};

struct core_dev_map {
+7 −3
Original line number Diff line number Diff line
@@ -730,9 +730,10 @@ TRACE_EVENT(cache_hwmon_update,
TRACE_EVENT(memlat_dev_meas,

	TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
		 unsigned long mem, unsigned long freq, unsigned int ratio),
		 unsigned long mem, unsigned long freq, unsigned int stall,
		 unsigned int ratio),

	TP_ARGS(name, dev_id, inst, mem, freq, ratio),
	TP_ARGS(name, dev_id, inst, mem, freq, stall, ratio),

	TP_STRUCT__entry(
		__string(name, name)
@@ -740,6 +741,7 @@ TRACE_EVENT(memlat_dev_meas,
		__field(unsigned long, inst)
		__field(unsigned long, mem)
		__field(unsigned long, freq)
		__field(unsigned int, stall)
		__field(unsigned int, ratio)
	),

@@ -749,15 +751,17 @@ TRACE_EVENT(memlat_dev_meas,
		__entry->inst = inst;
		__entry->mem = mem;
		__entry->freq = freq;
		__entry->stall = stall;
		__entry->ratio = ratio;
	),

	TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, ratio=%u",
	TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, ratio=%u",
		__get_str(name),
		__entry->dev_id,
		__entry->inst,
		__entry->mem,
		__entry->freq,
		__entry->stall,
		__entry->ratio)
);