Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4088fdc4 authored by Santosh Mardi's avatar Santosh Mardi Committed by Gerrit - the friendly Code Review server
Browse files

PM / devfreq: memlat: Add writeback percentage as a lat condition



Certain L3 workloads are latency sensitive to cache snoop traffic and
this traffic is not directly quantified in the existing memlat scheme.

Use L2 writeback percentage as a metric to identify snoop traffic to
improve memlat.

Change-Id: I9d43375d96de5a199c6a87c55e5c1079549b23ce
Signed-off-by: default avatarSantosh Mardi <gsantosh@codeaurora.org>
parent 725421da
Loading
Loading
Loading
Loading
+76 −0
Original line number Diff line number Diff line
@@ -65,8 +65,20 @@ struct cpu_data {
 *				defaults to using all of @cpu_grp's CPUs.
 * @miss_ev_id:			The event code corresponding to the @miss_ev
 *				perf event. Will be 0 for compute.
 * @access_ev_id:		The event code corresponding to the @access_ev
 *				perf event. Optional - only needed for writeback
 *				percent.
 * @wb_ev_id:			The event code corresponding to the @wb_ev perf
 *				event. Optional - only needed for writeback
 *				percent.
 * @miss_ev:			The cache miss perf event exclusive to this
 *				mon. Will be NULL for compute.
 * @access_ev:			The cache access perf event exclusive to this
 *				mon. Optional - only needed for writeback
 *				percent.
 * @wb_ev:			The cache writeback perf event exclusive to this
 *				mon. Optional - only needed for writeback
 *				percent.
 * @requested_update_ms:	The mon's desired polling rate. The lowest
 *				@requested_update_ms of all mons determines
 *				@cpu_grp's update_ms.
@@ -78,8 +90,12 @@ struct memlat_mon {
	bool			is_active;
	cpumask_t		cpus;
	unsigned int		miss_ev_id;
	unsigned int		access_ev_id;
	unsigned int		wb_ev_id;
	unsigned int		requested_update_ms;
	struct event_data	*miss_ev;
	struct event_data	*access_ev;
	struct event_data	*wb_ev;
	struct memlat_hwmon	hw;

	struct memlat_cpu_grp	*cpu_grp;
@@ -191,6 +207,11 @@ static void update_counts(struct memlat_cpu_grp *cpu_grp)
			unsigned int mon_idx =
				cpu - cpumask_first(&mon->cpus);
			read_event(&mon->miss_ev[mon_idx]);

			if (mon->wb_ev_id && mon->access_ev_id) {
				read_event(&mon->wb_ev[mon_idx]);
				read_event(&mon->access_ev[mon_idx]);
			}
		}
	}
}
@@ -219,6 +240,13 @@ static unsigned long get_cnt(struct memlat_hwmon *hw)
			devstats->inst_count = 0;
			devstats->mem_count = 1;
		}

		if (mon->access_ev_id && mon->wb_ev_id)
			devstats->wb_pct =
				mult_frac(100, mon->wb_ev[mon_idx].last_delta,
					  mon->access_ev[mon_idx].last_delta);
		else
			devstats->wb_pct = 0;
	}

	return 0;
@@ -365,6 +393,18 @@ static int start_hwmon(struct memlat_hwmon *hw)
					mon->miss_ev_id, attr);
			if (ret)
				goto unlock_out;

			if (mon->access_ev_id && mon->wb_ev_id) {
				ret = set_event(&mon->access_ev[idx], cpu,
						mon->access_ev_id, attr);
				if (ret)
					goto unlock_out;

				ret = set_event(&mon->wb_ev[idx], cpu,
						mon->wb_ev_id, attr);
				if (ret)
					goto unlock_out;
			}
		}
	}

@@ -401,6 +441,7 @@ static void stop_hwmon(struct memlat_hwmon *hw)
		devstats->mem_count = 0;
		devstats->freq = 0;
		devstats->stall_pct = 0;
		devstats->wb_pct = 0;
	}

	if (!cpu_grp->num_active_mons) {
@@ -648,6 +689,8 @@ static int memlat_mon_probe(struct platform_device *pdev, bool is_compute)
	 */
	if (is_compute) {
		mon->miss_ev_id = 0;
		mon->access_ev_id = 0;
		mon->wb_ev_id = 0;
		ret = register_compute(dev, hw);
	} else {
		mon->miss_ev =
@@ -668,6 +711,39 @@ static int memlat_mon_probe(struct platform_device *pdev, bool is_compute)
		}
		mon->miss_ev_id = event_id;

		ret = of_property_read_u32(dev->of_node, "qcom,access-ev",
					   &event_id);
		if (ret)
			dev_dbg(dev, "Access event not specified. Skipping.\n");
		else
			mon->access_ev_id = event_id;

		ret = of_property_read_u32(dev->of_node, "qcom,wb-ev",
					   &event_id);
		if (ret)
			dev_dbg(dev, "WB event not specified. Skipping.\n");
		else
			mon->wb_ev_id = event_id;

		if (mon->wb_ev_id && mon->access_ev_id) {
			mon->access_ev =
				devm_kzalloc(dev, num_cpus *
					     sizeof(*mon->access_ev),
					     GFP_KERNEL);
			if (!mon->access_ev) {
				ret = -ENOMEM;
				goto unlock_out;
			}

			mon->wb_ev =
				devm_kzalloc(dev, num_cpus *
					     sizeof(*mon->wb_ev), GFP_KERNEL);
			if (!mon->wb_ev) {
				ret = -ENOMEM;
				goto unlock_out;
			}
		}

		ret = register_memlat(dev, hw);
	}

+16 −5
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@
struct memlat_node {
	unsigned int ratio_ceil;
	unsigned int stall_floor;
	unsigned int wb_pct_thres;
	unsigned int wb_filter_ratio;
	bool mon_started;
	bool already_zero;
	struct list_head list;
@@ -292,11 +294,14 @@ static int devfreq_memlat_get_freq(struct devfreq *df,
					hw->core_stats[i].inst_count,
					hw->core_stats[i].mem_count,
					hw->core_stats[i].freq,
					hw->core_stats[i].stall_pct, ratio);

		if (ratio <= node->ratio_ceil
		    && hw->core_stats[i].stall_pct >= node->stall_floor
		    && hw->core_stats[i].freq > max_freq) {
					hw->core_stats[i].stall_pct,
					hw->core_stats[i].wb_pct, ratio);

		if (((ratio <= node->ratio_ceil
		      && hw->core_stats[i].stall_pct >= node->stall_floor) ||
		      (hw->core_stats[i].wb_pct >= node->wb_pct_thres
		      && ratio <= node->wb_filter_ratio))
		      && (hw->core_stats[i].freq > max_freq)) {
			lat_dev = i;
			max_freq = hw->core_stats[i].freq;
		}
@@ -322,10 +327,14 @@ static int devfreq_memlat_get_freq(struct devfreq *df,

gov_attr(ratio_ceil, 1U, 20000U);
gov_attr(stall_floor, 0U, 100U);
gov_attr(wb_pct_thres, 0U, 100U);
gov_attr(wb_filter_ratio, 0U, 50000U);

static struct attribute *memlat_dev_attr[] = {
	&dev_attr_ratio_ceil.attr,
	&dev_attr_stall_floor.attr,
	&dev_attr_wb_pct_thres.attr,
	&dev_attr_wb_filter_ratio.attr,
	&dev_attr_freq_map.attr,
	NULL,
};
@@ -488,6 +497,8 @@ static struct memlat_node *register_common(struct device *dev,
		return ERR_PTR(-ENOMEM);

	node->ratio_ceil = 10;
	node->wb_pct_thres = 100;
	node->wb_filter_ratio = 25000;
	node->hw = hw;

	if (hw->get_child_of_node) {
+4 −0
Original line number Diff line number Diff line
@@ -15,6 +15,9 @@
 * @mem_count:			Number of memory accesses made.
 * @freq:			Effective frequency of the device in the
 *				last interval.
 * @wb_pct:			The ratio of writebacks to accesses. Used as an
 *				indirect way to identify memory latency due to
 *				snoop activity.
 */
struct dev_stats {
	int id;
@@ -22,6 +25,7 @@ struct dev_stats {
	unsigned long mem_count;
	unsigned long freq;
	unsigned long stall_pct;
	unsigned long wb_pct;
};

struct core_dev_map {
+6 −4
Original line number Diff line number Diff line
@@ -730,12 +730,11 @@ TRACE_EVENT(cache_hwmon_update,
);

TRACE_EVENT(memlat_dev_meas,

	TP_PROTO(const char *name, unsigned int dev_id, unsigned long inst,
		 unsigned long mem, unsigned long freq, unsigned int stall,
		 unsigned int ratio),
		 unsigned int wb, unsigned int ratio),

	TP_ARGS(name, dev_id, inst, mem, freq, stall, ratio),
	TP_ARGS(name, dev_id, inst, mem, freq, stall, wb, ratio),

	TP_STRUCT__entry(
		__string(name, name)
@@ -744,6 +743,7 @@ TRACE_EVENT(memlat_dev_meas,
		__field(unsigned long, mem)
		__field(unsigned long, freq)
		__field(unsigned int, stall)
		__field(unsigned int, wb)
		__field(unsigned int, ratio)
	),

@@ -754,16 +754,18 @@ TRACE_EVENT(memlat_dev_meas,
		__entry->mem = mem;
		__entry->freq = freq;
		__entry->stall = stall;
		__entry->wb = wb;
		__entry->ratio = ratio;
	),

	TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, ratio=%u",
	TP_printk("dev: %s, id=%u, inst=%lu, mem=%lu, freq=%lu, stall=%u, wb=%u, ratio=%u",
		__get_str(name),
		__entry->dev_id,
		__entry->inst,
		__entry->mem,
		__entry->freq,
		__entry->stall,
		__entry->wb,
		__entry->ratio)
);