Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3f60b750 authored by qctecmdr Service's avatar qctecmdr Service Committed by Gerrit - the friendly Code Review server
Browse files

Merge "ANDROID: sched/events: Introduce util_est trace events"

parents 82db4d51 bb343c15
Loading
Loading
Loading
Loading
+26 −16
Original line number Diff line number Diff line
@@ -10,21 +10,21 @@
#include <math.h>
#include <stdio.h>

#define HALFLIFE 32
#define HALFLIFE { 32, 16, 8 }
#define SHIFT 32

double y;

void calc_runnable_avg_yN_inv(void)
void calc_runnable_avg_yN_inv(const int halflife)
{
	int i;
	unsigned int x;

	printf("static const u32 runnable_avg_yN_inv[] = {");
	for (i = 0; i < HALFLIFE; i++) {
	for (i = 0; i < halflife; i++) {
		x = ((1UL<<32)-1)*pow(y, i);

		if (i % 6 == 0) printf("\n\t");
		if (i % 4 == 0) printf("\n\t");
		printf("0x%8x, ", x);
	}
	printf("\n};\n\n");
@@ -32,12 +32,12 @@ void calc_runnable_avg_yN_inv(void)

int sum = 1024;

void calc_runnable_avg_yN_sum(void)
void calc_runnable_avg_yN_sum(const int halflife)
{
	int i;

	printf("static const u32 runnable_avg_yN_sum[] = {\n\t    0,");
	for (i = 1; i <= HALFLIFE; i++) {
	for (i = 1; i <= halflife; i++) {
		if (i == 1)
			sum *= y;
		else
@@ -55,7 +55,7 @@ int n = -1;
/* first period */
long max = 1024;

void calc_converged_max(void)
void calc_converged_max(const int halflife)
{
	long last = 0, y_inv = ((1UL<<32)-1)*y;

@@ -73,17 +73,17 @@ void calc_converged_max(void)
		last = max;
	}
	n--;
	printf("#define LOAD_AVG_PERIOD %d\n", HALFLIFE);
	printf("#define LOAD_AVG_PERIOD %d\n", halflife);
	printf("#define LOAD_AVG_MAX %ld\n", max);
//	printf("#define LOAD_AVG_MAX_N %d\n\n", n);
	printf("#define LOAD_AVG_MAX_N %d\n\n", n);
}

void calc_accumulated_sum_32(void)
void calc_accumulated_sum_32(const int halflife)
{
	int i, x = sum;

	printf("static const u32 __accumulated_sum_N32[] = {\n\t     0,");
	for (i = 1; i <= n/HALFLIFE+1; i++) {
	for (i = 1; i <= n/halflife+1; i++) {
		if (i > 1)
			x = x/2 + sum;

@@ -97,12 +97,22 @@ void calc_accumulated_sum_32(void)

void main(void)
{
	int hl_value[] = HALFLIFE;
	int hl_count = sizeof(hl_value) / sizeof(int);
	int hl_idx, halflife;

	printf("/* Generated by Documentation/scheduler/sched-pelt; do not modify. */\n\n");

	y = pow(0.5, 1/(double)HALFLIFE);
	for (hl_idx = 0; hl_idx < hl_count; ++hl_idx) {
		halflife = hl_value[hl_idx];

		y = pow(0.5, 1/(double)halflife);

	calc_runnable_avg_yN_inv();
//	calc_runnable_avg_yN_sum();
	calc_converged_max();
//	calc_accumulated_sum_32();
		printf("#if CONFIG_PELT_UTIL_HALFLIFE_%d\n", halflife);
		calc_runnable_avg_yN_inv(halflife);
		calc_runnable_avg_yN_sum(halflife);
		calc_converged_max(halflife);
		calc_accumulated_sum_32(halflife);
		printf("#endif\n\n");
	}
}
+3 −0
Original line number Diff line number Diff line
@@ -30,6 +30,9 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
/* Replace task scheduler's default frequency-invariant accounting */
#define arch_scale_freq_capacity topology_get_freq_scale

/* Replace task scheduler's default max-frequency-invariant accounting */
#define arch_scale_max_freq_capacity topology_get_max_freq_scale

/* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale

+29 −0
Original line number Diff line number Diff line
@@ -367,6 +367,34 @@ struct load_weight {
	u32				inv_weight;
};

/**
 * struct util_est - Estimation utilization of FAIR tasks
 * @enqueued: instantaneous estimated utilization of a task/cpu
 * @ewma:     the Exponential Weighted Moving Average (EWMA)
 *            utilization of a task
 *
 * Support data structure to track an Exponential Weighted Moving Average
 * (EWMA) of a FAIR task's utilization. New samples are added to the moving
 * average each time a task completes an activation. Sample's weight is chosen
 * so that the EWMA will be relatively insensitive to transient changes to the
 * task's workload.
 *
 * The enqueued attribute has a slightly different meaning for tasks and cpus:
 * - task:   the task's util_avg at last task dequeue time
 * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
 * Thus, the util_est.enqueued of a task represents the contribution on the
 * estimated utilization of the CPU where that task is currently enqueued.
 *
 * Only for tasks we track a moving average of the past instantaneous
 * estimated utilization. This allows to absorb sporadic drops in utilization
 * of an otherwise almost periodic task.
 */
struct util_est {
	unsigned int			enqueued;
	unsigned int			ewma;
#define UTIL_EST_WEIGHT_SHIFT		2
};

/*
 * The load_avg/util_avg accumulates an infinite geometric series
 * (see __update_load_avg() in kernel/sched/fair.c).
@@ -426,6 +454,7 @@ struct sched_avg {
	u32				period_contrib;
	unsigned long			load_avg;
	unsigned long			util_avg;
	struct util_est			util_est;
};

struct sched_statistics {
+63 −0
Original line number Diff line number Diff line
@@ -1182,6 +1182,69 @@ TRACE_EVENT(sched_find_best_target,
		__entry->backup_cpu)
);

/*
 * Tracepoint for tasks' estimated utilization.
 */
TRACE_EVENT(sched_util_est_task,

	TP_PROTO(struct task_struct *tsk, struct sched_avg *avg),

	TP_ARGS(tsk, avg),

	TP_STRUCT__entry(
		__array( char,	comm,	TASK_COMM_LEN		)
		__field( pid_t,		pid			)
		__field( int,		cpu			)
		__field( unsigned int,	util_avg		)
		__field( unsigned int,	est_enqueued		)
		__field( unsigned int,	est_ewma		)
	),

	TP_fast_assign(
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
		__entry->pid			= tsk->pid;
		__entry->cpu                    = task_cpu(tsk);
		__entry->util_avg               = avg->util_avg;
		__entry->est_enqueued           = avg->util_est.enqueued;
		__entry->est_ewma               = avg->util_est.ewma;
	),

	TP_printk("comm=%s pid=%d cpu=%d util_avg=%u util_est_ewma=%u util_est_enqueued=%u",
		  __entry->comm,
		  __entry->pid,
		  __entry->cpu,
		  __entry->util_avg,
		  __entry->est_ewma,
		  __entry->est_enqueued)
);

/*
 * Tracepoint for root cfs_rq's estimated utilization.
 */
TRACE_EVENT(sched_util_est_cpu,

	TP_PROTO(int cpu, struct cfs_rq *cfs_rq),

	TP_ARGS(cpu, cfs_rq),

	TP_STRUCT__entry(
		__field( int,		cpu			)
		__field( unsigned int,	util_avg		)
		__field( unsigned int,	util_est_enqueued	)
	),

	TP_fast_assign(
		__entry->cpu			= cpu;
		__entry->util_avg		= cfs_rq->avg.util_avg;
		__entry->util_est_enqueued	= cfs_rq->avg.util_est.enqueued;
	),

	TP_printk("cpu=%d util_avg=%u util_est_enqueued=%u",
		  __entry->cpu,
		  __entry->util_avg,
		  __entry->util_est_enqueued)
);

TRACE_EVENT(sched_cpu_util,

	TP_PROTO(int cpu),
+35 −0
Original line number Diff line number Diff line
@@ -610,6 +610,41 @@ config HAVE_UNSTABLE_SCHED_CLOCK
config GENERIC_SCHED_CLOCK
	bool

menu "FAIR Scheuler tunables"

choice
	prompt "Utilization's PELT half-Life"
	default PELT_UTIL_HALFLIFE_32
	help
	  Allows choosing one of the possible values for the PELT half-life to
	  be used for the update of the utilization of tasks and CPUs.
	  The half-life is the amount of [ms] required by the PELT signal to
	  build up to 50% utilization. The higher the half-life the longer it
	  takes for a task to be represented as a big one.

	  If not sure, use the default of 32 ms.

config PELT_UTIL_HALFLIFE_32
	bool "32 ms, default for server"

config PELT_UTIL_HALFLIFE_16
	bool "16 ms, suggested for interactive workloads"
	help
	  Use 16ms as PELT half-life value. This will increase the ramp-up and
	  decay of utlization and load twice as fast as for the default
	  configuration using 32ms.

config PELT_UTIL_HALFLIFE_8
	bool "8 ms, very fast"
	help
	  Use 8ms as PELT half-life value. This will increase the ramp-up and
	  decay of utlization and load four time as fast as for the default
	  configuration using 32ms.

endchoice

endmenu # FAIR Scheduler tunables"

#
# For architectures that want to enable the support for NUMA-affine scheduler
# balancing logic:
Loading