Merge "ANDROID: sched/events: Introduce util_est trace events" (3f60b750) · Commits · e / devices / android_kernel_oneplus_sm8150

Documentation/scheduler/sched-pelt.c

+26 −16

Original line number	Diff line number	Diff line
		@@ -10,21 +10,21 @@
		#include <math.h>
		#include <stdio.h>

		#define HALFLIFE 32
		#define HALFLIFE { 32, 16, 8 }
		#define SHIFT 32

		double y;

		void calc_runnable_avg_yN_inv(void)
		void calc_runnable_avg_yN_inv(const int halflife)
		{
		int i;
		unsigned int x;

		printf("static const u32 runnable_avg_yN_inv[] = {");
		for (i = 0; i < HALFLIFE; i++) {
		for (i = 0; i < halflife; i++) {
		x = ((1UL<<32)-1)*pow(y, i);

		if (i % 6 == 0) printf("\n\t");
		if (i % 4 == 0) printf("\n\t");
		printf("0x%8x, ", x);
		}
		printf("\n};\n\n");
		@@ -32,12 +32,12 @@ void calc_runnable_avg_yN_inv(void)

		int sum = 1024;

		void calc_runnable_avg_yN_sum(void)
		void calc_runnable_avg_yN_sum(const int halflife)
		{
		int i;

		printf("static const u32 runnable_avg_yN_sum[] = {\n\t 0,");
		for (i = 1; i <= HALFLIFE; i++) {
		for (i = 1; i <= halflife; i++) {
		if (i == 1)
		sum *= y;
		else
		@@ -55,7 +55,7 @@ int n = -1;
		/* first period */
		long max = 1024;

		void calc_converged_max(void)
		void calc_converged_max(const int halflife)
		{
		long last = 0, y_inv = ((1UL<<32)-1)*y;

		@@ -73,17 +73,17 @@ void calc_converged_max(void)
		last = max;
		}
		n--;
		printf("#define LOAD_AVG_PERIOD %d\n", HALFLIFE);
		printf("#define LOAD_AVG_PERIOD %d\n", halflife);
		printf("#define LOAD_AVG_MAX %ld\n", max);
		// printf("#define LOAD_AVG_MAX_N %d\n\n", n);
		printf("#define LOAD_AVG_MAX_N %d\n\n", n);
		}

		void calc_accumulated_sum_32(void)
		void calc_accumulated_sum_32(const int halflife)
		{
		int i, x = sum;

		printf("static const u32 __accumulated_sum_N32[] = {\n\t 0,");
		for (i = 1; i <= n/HALFLIFE+1; i++) {
		for (i = 1; i <= n/halflife+1; i++) {
		if (i > 1)
		x = x/2 + sum;

		@@ -97,12 +97,22 @@ void calc_accumulated_sum_32(void)

		void main(void)
		{
		int hl_value[] = HALFLIFE;
		int hl_count = sizeof(hl_value) / sizeof(int);
		int hl_idx, halflife;

		printf("/* Generated by Documentation/scheduler/sched-pelt; do not modify. */\n\n");

		y = pow(0.5, 1/(double)HALFLIFE);
		for (hl_idx = 0; hl_idx < hl_count; ++hl_idx) {
		halflife = hl_value[hl_idx];

		y = pow(0.5, 1/(double)halflife);

		calc_runnable_avg_yN_inv();
		// calc_runnable_avg_yN_sum();
		calc_converged_max();
		// calc_accumulated_sum_32();
		printf("#if CONFIG_PELT_UTIL_HALFLIFE_%d\n", halflife);
		calc_runnable_avg_yN_inv(halflife);
		calc_runnable_avg_yN_sum(halflife);
		calc_converged_max(halflife);
		calc_accumulated_sum_32(halflife);
		printf("#endif\n\n");
		}
		}

arch/arm/include/asm/topology.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -30,6 +30,9 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
		/* Replace task scheduler's default frequency-invariant accounting */
		#define arch_scale_freq_capacity topology_get_freq_scale

		/* Replace task scheduler's default max-frequency-invariant accounting */
		#define arch_scale_max_freq_capacity topology_get_max_freq_scale

		/* Replace task scheduler's default cpu-invariant accounting */
		#define arch_scale_cpu_capacity topology_get_cpu_scale

include/linux/sched.h

+29 −0

Original line number	Diff line number	Diff line
		@@ -367,6 +367,34 @@ struct load_weight {
		u32 inv_weight;
		};

		/**
		* struct util_est - Estimation utilization of FAIR tasks
		* @enqueued: instantaneous estimated utilization of a task/cpu
		* @ewma: the Exponential Weighted Moving Average (EWMA)
		* utilization of a task
		*
		* Support data structure to track an Exponential Weighted Moving Average
		* (EWMA) of a FAIR task's utilization. New samples are added to the moving
		* average each time a task completes an activation. Sample's weight is chosen
		* so that the EWMA will be relatively insensitive to transient changes to the
		* task's workload.
		*
		* The enqueued attribute has a slightly different meaning for tasks and cpus:
		* - task: the task's util_avg at last task dequeue time
		* - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
		* Thus, the util_est.enqueued of a task represents the contribution on the
		* estimated utilization of the CPU where that task is currently enqueued.
		*
		* Only for tasks we track a moving average of the past instantaneous
		* estimated utilization. This allows to absorb sporadic drops in utilization
		* of an otherwise almost periodic task.
		*/
		struct util_est {
		unsigned int enqueued;
		unsigned int ewma;
		#define UTIL_EST_WEIGHT_SHIFT 2
		};

		/*
		* The load_avg/util_avg accumulates an infinite geometric series
		* (see __update_load_avg() in kernel/sched/fair.c).
		@@ -426,6 +454,7 @@ struct sched_avg {
		u32 period_contrib;
		unsigned long load_avg;
		unsigned long util_avg;
		struct util_est util_est;
		};

		struct sched_statistics {

include/trace/events/sched.h

+63 −0

Original line number	Diff line number	Diff line
		@@ -1182,6 +1182,69 @@ TRACE_EVENT(sched_find_best_target,
		__entry->backup_cpu)
		);

		/*
		* Tracepoint for tasks' estimated utilization.
		*/
		TRACE_EVENT(sched_util_est_task,

		TP_PROTO(struct task_struct tsk, struct sched_avg avg),

		TP_ARGS(tsk, avg),

		TP_STRUCT__entry(
		__array( char, comm, TASK_COMM_LEN )
		__field( pid_t, pid )
		__field( int, cpu )
		__field( unsigned int, util_avg )
		__field( unsigned int, est_enqueued )
		__field( unsigned int, est_ewma )
		),

		TP_fast_assign(
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
		__entry->pid = tsk->pid;
		__entry->cpu = task_cpu(tsk);
		__entry->util_avg = avg->util_avg;
		__entry->est_enqueued = avg->util_est.enqueued;
		__entry->est_ewma = avg->util_est.ewma;
		),

		TP_printk("comm=%s pid=%d cpu=%d util_avg=%u util_est_ewma=%u util_est_enqueued=%u",
		__entry->comm,
		__entry->pid,
		__entry->cpu,
		__entry->util_avg,
		__entry->est_ewma,
		__entry->est_enqueued)
		);

		/*
		* Tracepoint for root cfs_rq's estimated utilization.
		*/
		TRACE_EVENT(sched_util_est_cpu,

		TP_PROTO(int cpu, struct cfs_rq *cfs_rq),

		TP_ARGS(cpu, cfs_rq),

		TP_STRUCT__entry(
		__field( int, cpu )
		__field( unsigned int, util_avg )
		__field( unsigned int, util_est_enqueued )
		),

		TP_fast_assign(
		__entry->cpu = cpu;
		__entry->util_avg = cfs_rq->avg.util_avg;
		__entry->util_est_enqueued = cfs_rq->avg.util_est.enqueued;
		),

		TP_printk("cpu=%d util_avg=%u util_est_enqueued=%u",
		__entry->cpu,
		__entry->util_avg,
		__entry->util_est_enqueued)
		);

		TRACE_EVENT(sched_cpu_util,

		TP_PROTO(int cpu),

init/Kconfig

+35 −0

Original line number	Diff line number	Diff line
		@@ -610,6 +610,41 @@ config HAVE_UNSTABLE_SCHED_CLOCK
		config GENERIC_SCHED_CLOCK
		bool

		menu "FAIR Scheuler tunables"

		choice
		prompt "Utilization's PELT half-Life"
		default PELT_UTIL_HALFLIFE_32
		help
		Allows choosing one of the possible values for the PELT half-life to
		be used for the update of the utilization of tasks and CPUs.
		The half-life is the amount of [ms] required by the PELT signal to
		build up to 50% utilization. The higher the half-life the longer it
		takes for a task to be represented as a big one.

		If not sure, use the default of 32 ms.

		config PELT_UTIL_HALFLIFE_32
		bool "32 ms, default for server"

		config PELT_UTIL_HALFLIFE_16
		bool "16 ms, suggested for interactive workloads"
		help
		Use 16ms as PELT half-life value. This will increase the ramp-up and
		decay of utlization and load twice as fast as for the default
		configuration using 32ms.

		config PELT_UTIL_HALFLIFE_8
		bool "8 ms, very fast"
		help
		Use 8ms as PELT half-life value. This will increase the ramp-up and
		decay of utlization and load four time as fast as for the default
		configuration using 32ms.

		endchoice

		endmenu # FAIR Scheduler tunables"

		#
		# For architectures that want to enable the support for NUMA-affine scheduler
		# balancing logic: