Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 606e7bca authored by Pavankumar Kondeti's avatar Pavankumar Kondeti
Browse files

sched: Introduce sched_freq_aggregate_threshold tunable



Do the aggregation for frequency only when the total group busy time
is above sched_freq_aggregate_threshold. This filtering is especially
needed for the cases where groups are created by including all threads
of an application process. This knob can be tuned to apply aggregation
only for the heavy workload applications.

When this knob is enabled and load is aggregated, the load is not
clipped to 100% @ current frequency to ramp up the frequency faster.

Change-Id: Icfd91c85938def101a989af3597d3dcaa8026d16
Signed-off-by: default avatarPavankumar Kondeti <pkondeti@codeaurora.org>
parent bc8bf609
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -83,6 +83,7 @@ extern unsigned int sysctl_sched_enable_thread_grouping;
extern unsigned int sysctl_sched_new_task_windows;
extern unsigned int sysctl_sched_pred_alert_freq;
extern unsigned int sysctl_sched_freq_aggregate;
extern unsigned int sysctl_sched_freq_aggregate_threshold_pct;
#endif
#endif

+28 −11
Original line number Diff line number Diff line
@@ -3290,6 +3290,8 @@ void sched_get_cpus_busy(struct sched_load *busy,
	u64 max_prev_sum = 0;
	int max_busy_cpu = cpumask_first(query_cpus);
	struct related_thread_group *grp;
	u64 total_group_load = 0, total_ngload = 0;
	bool aggregate_load = false;

	if (unlikely(cpus == 0))
		return;
@@ -3341,6 +3343,14 @@ void sched_get_cpus_busy(struct sched_load *busy,
		}
	}

	if (!notifier_sent[max_busy_cpu]) {
		group_load_in_freq_domain(
				&cpu_rq(max_busy_cpu)->freq_domain_cpumask,
				&total_group_load, &total_ngload);
		if (total_group_load > sched_freq_aggregate_threshold)
			aggregate_load = true;
	}

	i = 0;
	for_each_cpu(cpu, query_cpus) {
		group_load[i] = 0;
@@ -3350,11 +3360,11 @@ void sched_get_cpus_busy(struct sched_load *busy,
			goto skip_early;

		rq = cpu_rq(cpu);
		if (!notifier_sent[i]) {
			if (cpu == max_busy_cpu)
				group_load_in_freq_domain(
					&rq->freq_domain_cpumask,
					&group_load[i], &ngload[i]);
		if (!notifier_sent[i] && aggregate_load) {
			if (cpu == max_busy_cpu) {
				group_load[i] = total_group_load;
				ngload[i] = total_ngload;
			}
		} else {
			_group_load_in_cpu(cpu, &group_load[i], &ngload[i]);
		}
@@ -3391,7 +3401,19 @@ skip_early:
			goto exit_early;
		}

		if (!notifier_sent[i]) {
		/*
		 * When the load aggregation is controlled by
		 * sched_freq_aggregate_threshold, allow reporting loads
		 * greater than 100 @ Fcur to ramp up the frequency
		 * faster.
		 */
		if (notifier_sent[i] || (aggregate_load &&
					sched_freq_aggregate_threshold)) {
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						    cpu_max_possible_freq(cpu));
			nload[i] = scale_load_to_freq(nload[i], max_freq[i],
						    cpu_max_possible_freq(cpu));
		} else {
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						     cur_freq[i]);
			nload[i] = scale_load_to_freq(nload[i], max_freq[i],
@@ -3405,11 +3427,6 @@ skip_early:
						    cpu_max_possible_freq(cpu));
			nload[i] = scale_load_to_freq(nload[i], cur_freq[i],
						    cpu_max_possible_freq(cpu));
		} else {
			load[i] = scale_load_to_freq(load[i], max_freq[i],
						    cpu_max_possible_freq(cpu));
			nload[i] = scale_load_to_freq(nload[i], max_freq[i],
						    cpu_max_possible_freq(cpu));
		}
		pload[i] = scale_load_to_freq(pload[i], max_freq[i],
					     rq->cluster->max_possible_freq);
+42 −0
Original line number Diff line number Diff line
@@ -2387,6 +2387,14 @@ unsigned int __read_mostly sysctl_sched_prefer_sync_wakee_to_waker;
unsigned int __read_mostly sched_spill_load;
unsigned int __read_mostly sysctl_sched_spill_load_pct = 100;

/*
 * frequency aggregation threshold
 */
#ifdef CONFIG_SCHED_FREQ_INPUT
unsigned int __read_mostly sysctl_sched_freq_aggregate_threshold_pct = 0;
unsigned int __read_mostly sched_freq_aggregate_threshold = 0;
#endif

/*
 * Tasks whose bandwidth consumption on a cpu is more than
 * sched_upmigrate are considered "big" tasks. Big tasks will be
@@ -2479,6 +2487,9 @@ void set_hmp_defaults(void)
#ifdef CONFIG_SCHED_FREQ_INPUT
	sched_major_task_runtime =
		mult_frac(sched_ravg_window, MAJOR_TASK_PCT, 100);

	sched_freq_aggregate_threshold =
		pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);
#endif

	sched_init_task_load_pelt =
@@ -3698,11 +3709,39 @@ static inline int invalid_value_freq_input(unsigned int *data)

	return 0;
}

static inline int
handle_freq_aggregate_threshold(unsigned int *data, unsigned int old_val)
{
	/*
	 * Special handling for sched_freq_aggregate_threshold_pct
	 * which can be greater than 100. Use 1000 as an upper bound
	 * value which works for all practical use cases.
	 *
	 */
	if (data == &sysctl_sched_freq_aggregate_threshold_pct) {
		if (*data > 1000) {
			*data = old_val;
			return -EINVAL;
		}
		sched_freq_aggregate_threshold =
			pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);
		return 1;
	}

	return 0;
}
#else
static inline int invalid_value_freq_input(unsigned int *data)
{
	return 0;
}

static inline int
handle_freq_aggregate_threshold(unsigned int *data, unsigned int old_val)
{
	return 0;
}
#endif

static inline int invalid_value(unsigned int *data)
@@ -3780,6 +3819,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
	if (write && (old_val == *data))
		goto done;

	if (handle_freq_aggregate_threshold(data, old_val)) {
		goto done;
	}
	if (data == (unsigned int *)&sysctl_sched_upmigrate_min_nice) {
		if ((*(int *)data) < -20 || (*(int *)data) > 19) {
			*data = old_val;
+2 −0
Original line number Diff line number Diff line
@@ -1305,6 +1305,8 @@ static inline int same_freq_domain(int src_cpu, int dst_cpu)
	return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
}

extern unsigned int sched_freq_aggregate_threshold;

#else	/* CONFIG_SCHED_FREQ_INPUT */

#define sched_migration_fixup	0
+7 −0
Original line number Diff line number Diff line
@@ -519,6 +519,13 @@ static struct ctl_table kern_table[] = {
		.mode           = 0644,
		.proc_handler   = sched_window_update_handler,
	},
	{
		.procname	= "sched_freq_aggregate_threshold",
		.data		= &sysctl_sched_freq_aggregate_threshold_pct,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= sched_hmp_proc_update_handler,
	},
#endif
#endif
	{