Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 69034c61 authored by Linux Build Service Account's avatar Linux Build Service Account Committed by Gerrit - the friendly Code Review server
Browse files

Merge "soc: qcom: msm_perf: Add detection for heavy IO workloads"

parents 16f82c33 795eb5f5
Loading
Loading
Loading
Loading
+368 −1
Original line number Diff line number Diff line
@@ -18,8 +18,12 @@
#include <linux/cpumask.h>
#include <linux/cpufreq.h>
#include <linux/slab.h>

#include <linux/sched.h>
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sysfs.h>
#include <linux/module.h>
#include <linux/kthread.h>

static struct mutex managed_cpus_lock;

@@ -31,6 +35,13 @@ struct cluster {
	int max_cpu_request;
	/* To track CPUs that the module decides to offline */
	cpumask_var_t offlined_cpus;

	/* stats for load detection */
	u64 last_io_check_ts;
	unsigned int iowait_cycle_cnt;
	spinlock_t iowait_lock;
	unsigned int cur_io_busy;
	bool io_change;
};
static struct cluster **managed_clusters;
static bool clusters_inited;
@@ -51,6 +62,31 @@ static int rm_high_pwr_cost_cpus(struct cluster *cl);

static DEFINE_PER_CPU(unsigned int, cpu_power_cost);

struct load_stats {
	u64 last_wallclock;
	/* IO wait related */
	u64 last_iowait;
	unsigned int last_iopercent;
};
static DEFINE_PER_CPU(struct load_stats, cpu_load_stats);
#define LAST_UPDATE_TOL		USEC_PER_MSEC

/* Bitmask to keep track of the workloads being detected */
static unsigned int workload_detect;
#define IO_DETECT	1

/* IOwait related tunables */
static unsigned int io_enter_cycles = 4;
static u64 iowait_ceiling_pct = 25;
static u64 iowait_floor_pct = 8;
#define LAST_IO_CHECK_TOL	(3 * USEC_PER_MSEC)

static unsigned int aggr_iobusy;

static struct task_struct *notify_thread;

/**************************sysfs start********************************/

static int set_num_clusters(const char *buf, const struct kernel_param *kp)
{
	unsigned int val;
@@ -381,6 +417,144 @@ static const struct kernel_param_ops param_ops_cpu_max_freq = {
};
module_param_cb(cpu_max_freq, &param_ops_cpu_max_freq, NULL, 0644);

static int set_io_enter_cycles(const char *buf, const struct kernel_param *kp)
{
	unsigned int val;

	if (sscanf(buf, "%u\n", &val) != 1)
		return -EINVAL;

	io_enter_cycles = val;

	return 0;
}

static int get_io_enter_cycles(char *buf, const struct kernel_param *kp)
{
	return snprintf(buf, PAGE_SIZE, "%u", io_enter_cycles);
}

static const struct kernel_param_ops param_ops_io_enter_cycles = {
	.set = set_io_enter_cycles,
	.get = get_io_enter_cycles,
};
device_param_cb(io_enter_cycles, &param_ops_io_enter_cycles, NULL, 0644);

static int set_iowait_floor_pct(const char *buf, const struct kernel_param *kp)
{
	u64 val;

	if (sscanf(buf, "%llu\n", &val) != 1)
		return -EINVAL;
	if (val > iowait_ceiling_pct)
		return -EINVAL;

	iowait_floor_pct = val;

	return 0;
}

static int get_iowait_floor_pct(char *buf, const struct kernel_param *kp)
{
	return snprintf(buf, PAGE_SIZE, "%llu", iowait_floor_pct);
}

static const struct kernel_param_ops param_ops_iowait_floor_pct = {
	.set = set_iowait_floor_pct,
	.get = get_iowait_floor_pct,
};
device_param_cb(iowait_floor_pct, &param_ops_iowait_floor_pct, NULL, 0644);

static int set_iowait_ceiling_pct(const char *buf,
						const struct kernel_param *kp)
{
	u64 val;

	if (sscanf(buf, "%llu\n", &val) != 1)
		return -EINVAL;
	if (val < iowait_floor_pct)
		return -EINVAL;

	iowait_ceiling_pct = val;

	return 0;
}

static int get_iowait_ceiling_pct(char *buf, const struct kernel_param *kp)
{
	return snprintf(buf, PAGE_SIZE, "%llu", iowait_ceiling_pct);
}

static const struct kernel_param_ops param_ops_iowait_ceiling_pct = {
	.set = set_iowait_ceiling_pct,
	.get = get_iowait_ceiling_pct,
};
device_param_cb(iowait_ceiling_pct, &param_ops_iowait_ceiling_pct, NULL, 0644);

static int set_workload_detect(const char *buf, const struct kernel_param *kp)
{
	unsigned int val, i;
	struct cluster *i_cl;
	unsigned long flags;

	if (!clusters_inited)
		return -EINVAL;

	if (sscanf(buf, "%u\n", &val) != 1)
		return -EINVAL;

	if (val == workload_detect)
		return 0;

	workload_detect = val;

	if (!(workload_detect & IO_DETECT)) {
		for (i = 0; i < num_clusters; i++) {
			i_cl = managed_clusters[i];
			spin_lock_irqsave(&i_cl->iowait_lock, flags);
			i_cl->iowait_cycle_cnt = 0;
			i_cl->cur_io_busy = 0;
			i_cl->io_change = true;
			spin_unlock_irqrestore(&i_cl->iowait_lock, flags);
		}
	}

	wake_up_process(notify_thread);
	return 0;
}

static int get_workload_detect(char *buf, const struct kernel_param *kp)
{
	return snprintf(buf, PAGE_SIZE, "%u", workload_detect);
}

static const struct kernel_param_ops param_ops_workload_detect = {
	.set = set_workload_detect,
	.get = get_workload_detect,
};
device_param_cb(workload_detect, &param_ops_workload_detect, NULL, 0644);

static struct kobject *mode_kobj;

static ssize_t show_aggr_iobusy(struct kobject *kobj,
					struct kobj_attribute *attr, char *buf)
{
	return snprintf(buf, PAGE_SIZE, "%u\n", aggr_iobusy);
}
static struct kobj_attribute aggr_iobusy_attr =
__ATTR(aggr_iobusy, 0444, show_aggr_iobusy, NULL);

static struct attribute *attrs[] = {
	&aggr_iobusy_attr.attr,
	NULL,
};

static struct attribute_group attr_group = {
	.attrs = attrs,
};

/*******************************sysfs ends************************************/

static unsigned int num_online_managed(struct cpumask *mask)
{
	struct cpumask tmp_mask;
@@ -419,6 +593,174 @@ static struct notifier_block perf_cpufreq_nb = {
	.notifier_call = perf_adjust_notify,
};

static bool check_notify_status(void)
{
	int i;
	struct cluster *cl;
	bool any_change = false;
	unsigned long flags;

	for (i = 0; i < num_clusters; i++) {
		cl = managed_clusters[i];
		spin_lock_irqsave(&cl->iowait_lock, flags);
		if (!any_change)
			any_change = cl->io_change;
		cl->io_change = false;
		spin_unlock_irqrestore(&cl->iowait_lock, flags);
	}

	return any_change;
}

static int notify_userspace(void *data)
{
	unsigned int i, io;

	while (1) {
		set_current_state(TASK_INTERRUPTIBLE);
		if (!check_notify_status()) {
			schedule();

			if (kthread_should_stop())
				break;
		}
		set_current_state(TASK_RUNNING);

		io = 0;
		for (i = 0; i < num_clusters; i++)
			io |= managed_clusters[i]->cur_io_busy;

		if (io != aggr_iobusy) {
			aggr_iobusy = io;
			sysfs_notify(mode_kobj, NULL, "aggr_iobusy");
			pr_debug("msm_perf: Notifying IO: %u\n", aggr_iobusy);
		}
	}

	return 0;
}

static void check_cluster_iowait(struct cluster *cl, unsigned int rate, u64 now)
{
	struct load_stats *pcpu_st;
	unsigned int i;
	unsigned long flags;
	unsigned int temp_iobusy;
	u64 max_iowait = 0;

	spin_lock_irqsave(&cl->iowait_lock, flags);

	if (((now - cl->last_io_check_ts) < (rate - LAST_IO_CHECK_TOL)) ||
					!(workload_detect & IO_DETECT)) {
		spin_unlock_irqrestore(&cl->iowait_lock, flags);
		return;
	}

	temp_iobusy = cl->cur_io_busy;
	for_each_cpu(i, cl->cpus) {
		pcpu_st = &per_cpu(cpu_load_stats, i);
		if ((now - pcpu_st->last_wallclock) > (rate + LAST_UPDATE_TOL))
			continue;
		if (max_iowait < pcpu_st->last_iopercent)
			max_iowait = pcpu_st->last_iopercent;
	}

	if (!cl->cur_io_busy) {
		if (max_iowait > iowait_ceiling_pct) {
			cl->iowait_cycle_cnt++;
			if (cl->iowait_cycle_cnt >= io_enter_cycles)
				cl->cur_io_busy = 1;
		} else {
			cl->iowait_cycle_cnt = 0;
		}
	} else {
		if (max_iowait < iowait_floor_pct) {
			cl->iowait_cycle_cnt--;
			if (!cl->iowait_cycle_cnt)
				cl->cur_io_busy = 0;
		} else {
			cl->iowait_cycle_cnt = io_enter_cycles;
		}
	}
	cl->last_io_check_ts = now;
	trace_track_iowait(cpumask_first(cl->cpus), cl->iowait_cycle_cnt,
						cl->cur_io_busy, max_iowait);

	if (temp_iobusy != cl->cur_io_busy) {
		cl->io_change = true;
		pr_debug("msm_perf: IO changed to %u\n", cl->cur_io_busy);
	}

	spin_unlock_irqrestore(&cl->iowait_lock, flags);
	if (cl->io_change)
		wake_up_process(notify_thread);
}

static void check_cpu_io_stats(unsigned int cpu, unsigned int timer_rate,
									u64 now)
{
	struct cluster *cl = NULL;
	unsigned int i;

	for (i = 0; i < num_clusters; i++) {
		if (cpumask_test_cpu(cpu, managed_clusters[i]->cpus)) {
			cl = managed_clusters[i];
			break;
		}
	}
	if (cl == NULL)
		return;

	check_cluster_iowait(cl, timer_rate, now);
}

static int perf_govinfo_notify(struct notifier_block *nb, unsigned long val,
								void *data)
{
	struct cpufreq_govinfo *gov_info = data;
	unsigned int cpu = gov_info->cpu;
	struct load_stats *cpu_st = &per_cpu(cpu_load_stats, cpu);
	u64 now, cur_iowait, time_diff, iowait_diff;

	if (!clusters_inited || !workload_detect)
		return NOTIFY_OK;

	cur_iowait = get_cpu_iowait_time_us(cpu, &now);
	if (cur_iowait >= cpu_st->last_iowait)
		iowait_diff = cur_iowait - cpu_st->last_iowait;
	else
		iowait_diff = 0;

	if (now > cpu_st->last_wallclock)
		time_diff = now - cpu_st->last_wallclock;
	else
		return NOTIFY_OK;

	if (iowait_diff <= time_diff) {
		iowait_diff *= 100;
		cpu_st->last_iopercent = div64_u64(iowait_diff, time_diff);
	} else {
		cpu_st->last_iopercent = 100;
	}

	cpu_st->last_wallclock = now;
	cpu_st->last_iowait = cur_iowait;

	/*
	 * Avoid deadlock in case governor notifier ran in the context
	 * of notify_work thread
	 */
	if (current == notify_thread)
		return NOTIFY_OK;

	check_cpu_io_stats(cpu, gov_info->sampling_rate_us, now);

	return NOTIFY_OK;
}
static struct notifier_block perf_govinfo_nb = {
	.notifier_call = perf_govinfo_notify,
};

/*
 * Attempt to offline CPUs based on their power cost.
 * CPUs with higher power costs are offlined first.
@@ -647,6 +989,8 @@ static struct notifier_block __refdata msm_performance_cpu_notifier = {
static int init_cluster_control(void)
{
	unsigned int i;
	int ret;
	struct kobject *module_kobj;

	managed_clusters = kzalloc(num_clusters * sizeof(struct cluster *),
								GFP_KERNEL);
@@ -664,12 +1008,31 @@ static int init_cluster_control(void)
		}

		managed_clusters[i]->max_cpu_request = -1;
		spin_lock_init(&(managed_clusters[i]->iowait_lock));
	}

	INIT_DELAYED_WORK(&evaluate_hotplug_work, check_cluster_status);
	mutex_init(&managed_cpus_lock);

	module_kobj = kset_find_obj(module_kset, KBUILD_MODNAME);
	if (!module_kobj) {
		pr_err("msm_perf: Couldn't find module kobject\n");
		return -ENOENT;
	}
	mode_kobj = kobject_create_and_add("workload_modes", module_kobj);
	if (!mode_kobj) {
		pr_err("msm_perf: Failed to add mode_kobj\n");
		return -ENOMEM;
	}
	ret = sysfs_create_group(mode_kobj, &attr_group);
	if (ret) {
		pr_err("msm_perf: Failed to create sysfs\n");
		return ret;
	}

	notify_thread = kthread_run(notify_userspace, NULL, "wrkld_notify");
	clusters_inited = true;

	return 0;
}

@@ -678,9 +1041,13 @@ static int __init msm_performance_init(void)
	unsigned int cpu;

	cpufreq_register_notifier(&perf_cpufreq_nb, CPUFREQ_POLICY_NOTIFIER);
	cpufreq_register_notifier(&perf_govinfo_nb, CPUFREQ_GOVINFO_NOTIFIER);

	for_each_present_cpu(cpu)
		per_cpu(cpu_stats, cpu).max = UINT_MAX;

	register_cpu_notifier(&msm_performance_cpu_notifier);

	return 0;
}
late_initcall(msm_performance_init);
+31 −0
Original line number Diff line number Diff line
@@ -371,6 +371,37 @@ TRACE_EVENT(core_ctl_set_busy,
		  __entry->is_busy)
);

DECLARE_EVENT_CLASS(kpm_module2,

	TP_PROTO(unsigned int cpu, unsigned int cycles, unsigned int io_busy,
								u64 iowait),

	TP_ARGS(cpu, cycles, io_busy, iowait),

	TP_STRUCT__entry(
		__field(u32, cpu)
		__field(u32, cycles)
		__field(u32, io_busy)
		__field(u64, iowait)
	),

	TP_fast_assign(
		__entry->cpu = cpu;
		__entry->cycles = cycles;
		__entry->io_busy = io_busy;
		__entry->iowait = iowait;
	),

	TP_printk("CPU:%u cycles=%u io_busy=%u iowait=%lu",
		(unsigned int)__entry->cpu, (unsigned int)__entry->cycles,
		(unsigned int)__entry->io_busy, (unsigned long)__entry->iowait)
);

DEFINE_EVENT(kpm_module2, track_iowait,
	TP_PROTO(unsigned int cpu, unsigned int cycles, unsigned int io_busy,
								u64 iowait),
	TP_ARGS(cpu, cycles, io_busy, iowait)
);
#endif /* _TRACE_POWER_H */

/* This part must be outside protection */