Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c669cccb authored by Satya Durga Srinivasu Prabhala's avatar Satya Durga Srinivasu Prabhala
Browse files

sched: Add migration support for multi cluster systems



In case of multi cluster systems with varying capacity of CPUs,
need for migration of tasks would be different as task would need
to get migrated to CPU in adjacent cluster. This change adds
support to accept multiple values for the {up,down}_migrate knobs,
so that, tasks can be migrated to CPU in adjacent cluster.

Change-Id: I325cc71884d9bbac14475cd838a3955d53e03d1e
Signed-off-by: default avatarSatya Durga Srinivasu Prabhala <satyap@codeaurora.org>
parent c85e6b5e
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -19,14 +19,18 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
enum { sysctl_hung_task_timeout_secs = 0 };
#endif

#define MAX_CLUSTERS 3
/* MAX_MARGIN_LEVELS should be one less than MAX_CLUSTERS */
#define MAX_MARGIN_LEVELS (MAX_CLUSTERS - 1)

extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_sync_hint_enable;
extern unsigned int sysctl_sched_cstate_aware;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_capacity_margin_up;
extern unsigned int sysctl_sched_capacity_margin_down;
extern unsigned int sysctl_sched_capacity_margin_up[MAX_MARGIN_LEVELS];
extern unsigned int sysctl_sched_capacity_margin_down[MAX_MARGIN_LEVELS];
#ifdef CONFIG_SCHED_WALT
extern unsigned int sysctl_sched_use_walt_cpu_util;
extern unsigned int sysctl_sched_use_walt_task_util;
+108 −5
Original line number Diff line number Diff line
@@ -6707,24 +6707,127 @@ void sched_move_task(struct task_struct *tsk)
}

#ifdef CONFIG_PROC_SYSCTL
static int find_capacity_margin_levels(void)
{
	int cpu, max_clusters;

	for (cpu = max_clusters = 0; cpu < num_possible_cpus();) {
		cpu += cpumask_weight(topology_core_cpumask(cpu));
		max_clusters++;
	}

	/*
	 * Capacity margin levels is number of clusters available in
	 * the system subtracted by 1.
	 */
	return max_clusters - 1;
}

static void sched_update_up_migrate_values(int cap_margin_levels,
				const struct cpumask *cluster_cpus[])
{
	int i, cpu;

	if (cap_margin_levels > 1) {
		/*
		 * No need to worry about CPUs in last cluster
		 * if there are more than 2 clusters in the system
		 */
		for (i = 0; i < cap_margin_levels; i++)
			if (cluster_cpus[i])
				for_each_cpu(cpu, cluster_cpus[i])
					sched_capacity_margin_up[cpu] =
					sysctl_sched_capacity_margin_up[i];
	} else {
		for_each_possible_cpu(cpu)
			sched_capacity_margin_up[cpu] =
				sysctl_sched_capacity_margin_up[0];
	}
}

static void sched_update_down_migrate_values(int cap_margin_levels,
				const struct cpumask *cluster_cpus[])
{
	int i, cpu;

	if (cap_margin_levels > 1) {
		/*
		 * Skip first cluster as down migrate value isn't needed
		 */
		for (i = 0; i < cap_margin_levels; i++)
			if (cluster_cpus[i+1])
				for_each_cpu(cpu, cluster_cpus[i+1])
					sched_capacity_margin_down[cpu] =
					sysctl_sched_capacity_margin_down[i];
	} else {
		for_each_possible_cpu(cpu)
			sched_capacity_margin_down[cpu] =
				sysctl_sched_capacity_margin_down[0];
	}
}

static int sched_update_updown_migrate_values(unsigned int *data,
					int cap_margin_levels, int ret)
{
	int i, cpu;
	static const struct cpumask *cluster_cpus[MAX_CLUSTERS];

	for (i = cpu = 0; (!cluster_cpus[i]) &&
				cpu < num_possible_cpus(); i++) {
		cluster_cpus[i] = topology_core_cpumask(cpu);
		cpu += cpumask_weight(topology_core_cpumask(cpu));
	}

	if (data == &sysctl_sched_capacity_margin_up[0])
		sched_update_up_migrate_values(cap_margin_levels,
							cluster_cpus);
	else if (data == &sysctl_sched_capacity_margin_down[0])
		sched_update_down_migrate_values(cap_margin_levels,
							cluster_cpus);
	else
		ret = -EINVAL;

	return ret;
}

int sched_updown_migrate_handler(struct ctl_table *table, int write,
				 void __user *buffer, size_t *lenp,
				 loff_t *ppos)
{
	int ret;
	int ret, i;
	unsigned int *data = (unsigned int *)table->data;
	unsigned int old_val;
	static DEFINE_MUTEX(mutex);
	static int cap_margin_levels = -1;

	mutex_lock(&mutex);
	old_val = *data;

	if (cap_margin_levels == -1 ||
		table->maxlen != (sizeof(unsigned int) * cap_margin_levels)) {
		cap_margin_levels = find_capacity_margin_levels();
		table->maxlen = sizeof(unsigned int) * cap_margin_levels;
	}

	if (cap_margin_levels <= 0) {
		mutex_unlock(&mutex);
		return -EINVAL;
	}

	ret = proc_douintvec_capacity(table, write, buffer, lenp, ppos);

	if (!ret && write && sysctl_sched_capacity_margin_up >
				sysctl_sched_capacity_margin_down) {
		ret = -EINVAL;
	if (!ret && write) {
		for (i = 0; i < cap_margin_levels; i++) {
			if (sysctl_sched_capacity_margin_up[i] >
					sysctl_sched_capacity_margin_down[i]) {
				*data = old_val;
				mutex_unlock(&mutex);
				return -EINVAL;
			}
		}

		ret = sched_update_updown_migrate_values(data,
						cap_margin_levels, ret);
	}
	mutex_unlock(&mutex);

+14 −4
Original line number Diff line number Diff line
@@ -183,8 +183,15 @@ unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
 */
unsigned int capacity_margin				= 1280;

unsigned int sysctl_sched_capacity_margin_up = 1078; /* ~5% margin */
unsigned int sysctl_sched_capacity_margin_down = 1205; /* ~15% margin */
/* Migration margins */
unsigned int sysctl_sched_capacity_margin_up[MAX_MARGIN_LEVELS] = {
			[0 ... MAX_MARGIN_LEVELS-1] = 1078}; /* ~5% margin */
unsigned int sysctl_sched_capacity_margin_down[MAX_MARGIN_LEVELS] = {
			[0 ... MAX_MARGIN_LEVELS-1] = 1205}; /* ~15% margin */
unsigned int sched_capacity_margin_up[NR_CPUS] = {
			[0 ... NR_CPUS-1] = 1078}; /* ~5% margin */
unsigned int sched_capacity_margin_down[NR_CPUS] = {
			[0 ... NR_CPUS-1] = 1205}; /* ~15% margin */

static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
@@ -6925,10 +6932,13 @@ static inline int task_fits_capacity(struct task_struct *p,
{
	unsigned int margin;

	if (capacity == max_capacity)
		return true;

	if (capacity_orig_of(task_cpu(p)) > capacity_orig_of(cpu))
		margin = sysctl_sched_capacity_margin_down;
		margin = sched_capacity_margin_down[task_cpu(p)];
	else
		margin = sysctl_sched_capacity_margin_up;
		margin = sysctl_sched_capacity_margin_up[task_cpu(p)];

	return capacity * 1024 > boosted_task_util(p) * margin;
}
+2 −0
Original line number Diff line number Diff line
@@ -49,6 +49,8 @@ struct rq;
struct cpuidle_state;

extern __read_mostly bool sched_predl;
extern unsigned int sched_capacity_margin_up[NR_CPUS];
extern unsigned int sched_capacity_margin_down[NR_CPUS];

#ifdef CONFIG_SCHED_WALT
extern unsigned int sched_ravg_window;
+2 −2
Original line number Diff line number Diff line
@@ -349,14 +349,14 @@ static struct ctl_table kern_table[] = {
	{
		.procname	= "sched_upmigrate",
		.data		= &sysctl_sched_capacity_margin_up,
		.maxlen		= sizeof(unsigned int),
		.maxlen		= sizeof(unsigned int) * MAX_MARGIN_LEVELS,
		.mode		= 0644,
		.proc_handler	= sched_updown_migrate_handler,
	},
	{
		.procname	= "sched_downmigrate",
		.data		= &sysctl_sched_capacity_margin_down,
		.maxlen		= sizeof(unsigned int),
		.maxlen		= sizeof(unsigned int) * MAX_MARGIN_LEVELS,
		.mode		= 0644,
		.proc_handler	= sched_updown_migrate_handler,
	},