Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5b51f2f8 authored by Paul Turner's avatar Paul Turner Committed by Ingo Molnar
Browse files

sched: Make __update_entity_runnable_avg() fast



__update_entity_runnable_avg forms the core of maintaining an entity's runnable
load average.  In this function we charge the accumulated run-time since last
update and handle appropriate decay.  In some cases, e.g. a waking task, this
time interval may be much larger than our period unit.

Fortunately we can exploit some properties of our series to perform decay for a
blocked update in constant time and account the contribution for a running
update in essentially-constant* time.

[*]: For any running entity they should be performing updates at the tick which
gives us a soft limit of 1 jiffy between updates, and we can compute up to a
32 jiffy update in a single pass.

C program to generate the magic constants in the arrays:

  #include <math.h>
  #include <stdio.h>

  #define N 32
  #define WMULT_SHIFT 32

  const long WMULT_CONST = ((1UL << N) - 1);
  double y;

  long runnable_avg_yN_inv[N];
  void calc_mult_inv() {
  	int i;
  	double yn = 0;

  	printf("inverses\n");
  	for (i = 0; i < N; i++) {
  		yn = (double)WMULT_CONST * pow(y, i);
  		runnable_avg_yN_inv[i] = yn;
  		printf("%2d: 0x%8lx\n", i, runnable_avg_yN_inv[i]);
  	}
  	printf("\n");
  }

  long mult_inv(long c, int n) {
  	return (c * runnable_avg_yN_inv[n]) >>  WMULT_SHIFT;
  }

  void calc_yn_sum(int n)
  {
  	int i;
  	double sum = 0, sum_fl = 0, diff = 0;

  	/*
  	 * We take the floored sum to ensure the sum of partial sums is never
  	 * larger than the actual sum.
  	 */
  	printf("sum y^n\n");
  	printf("   %8s  %8s %8s\n", "exact", "floor", "error");
  	for (i = 1; i <= n; i++) {
  		sum = (y * sum + y * 1024);
  		sum_fl = floor(y * sum_fl+ y * 1024);
  		printf("%2d: %8.0f  %8.0f %8.0f\n", i, sum, sum_fl,
  			sum_fl - sum);
  	}
  	printf("\n");
  }

  void calc_conv(long n) {
  	long old_n;
  	int i = -1;

  	printf("convergence (LOAD_AVG_MAX, LOAD_AVG_MAX_N)\n");
  	do {
  		old_n = n;
  		n = mult_inv(n, 1) + 1024;
  		i++;
  	} while (n != old_n);
  	printf("%d> %ld\n", i - 1, n);
  	printf("\n");
  }

  void main() {
  	y = pow(0.5, 1/(double)N);
  	calc_mult_inv();
  	calc_conv(1024);
  	calc_yn_sum(N);
  }

[ Compile with -lm ]
Signed-off-by: default avatarPaul Turner <pjt@google.com>
Reviewed-by: default avatarBen Segall <bsegall@google.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120823141507.277808946@google.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent f269ae04
Loading
Loading
Loading
Loading
+101 −24
Original line number Diff line number Diff line
@@ -883,18 +883,93 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
#endif /* CONFIG_FAIR_GROUP_SCHED */

#ifdef CONFIG_SMP
/*
 * We choose a half-life close to 1 scheduling period.
 * Note: The tables below are dependent on this value.
 */
#define LOAD_AVG_PERIOD 32
#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */

/* Precomputed fixed inverse multiplies for multiplication by y^n */
static const u32 runnable_avg_yN_inv[] = {
	0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
	0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
	0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
	0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
	0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
	0x85aac367, 0x82cd8698,
};

/*
 * Precomputed \Sum y^k { 1<=k<=n }.  These are floor(true_value) to prevent
 * over-estimates when re-combining.
 */
static const u32 runnable_avg_yN_sum[] = {
	    0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
	 9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
	17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
};

/*
 * Approximate:
 *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
 */
static __always_inline u64 decay_load(u64 val, u64 n)
{
	for (; n && val; n--) {
		val *= 4008;
		val >>= 12;
	}
	unsigned int local_n;

	if (!n)
		return val;
	else if (unlikely(n > LOAD_AVG_PERIOD * 63))
		return 0;

	/* after bounds checking we can collapse to 32-bit */
	local_n = n;

	/*
	 * As y^PERIOD = 1/2, we can combine
	 *    y^n = 1/2^(n/PERIOD) * k^(n%PERIOD)
	 * With a look-up table which covers k^n (n<PERIOD)
	 *
	 * To achieve constant time decay_load.
	 */
	if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
		val >>= local_n / LOAD_AVG_PERIOD;
		local_n %= LOAD_AVG_PERIOD;
	}

	val *= runnable_avg_yN_inv[local_n];
	/* We don't use SRR here since we always want to round down. */
	return val >> 32;
}

/*
 * For updates fully spanning n periods, the contribution to runnable
 * average will be: \Sum 1024*y^n
 *
 * We can compute this reasonably efficiently by combining:
 *   y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for  n <PERIOD}
 */
static u32 __compute_runnable_contrib(u64 n)
{
	u32 contrib = 0;

	if (likely(n <= LOAD_AVG_PERIOD))
		return runnable_avg_yN_sum[n];
	else if (unlikely(n >= LOAD_AVG_MAX_N))
		return LOAD_AVG_MAX;

	/* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */
	do {
		contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */
		contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD];

		n -= LOAD_AVG_PERIOD;
	} while (n > LOAD_AVG_PERIOD);

	contrib = decay_load(contrib, n);
	return contrib + runnable_avg_yN_sum[n];
}

/*
@@ -929,7 +1004,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
							struct sched_avg *sa,
							int runnable)
{
	u64 delta;
	u64 delta, periods;
	u32 runnable_contrib;
	int delta_w, decayed = 0;

	delta = now - sa->last_runnable_update;
@@ -963,25 +1039,26 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
		 * period and accrue it.
		 */
		delta_w = 1024 - delta_w;
		BUG_ON(delta_w > delta);
		do {
		if (runnable)
			sa->runnable_avg_sum += delta_w;
		sa->runnable_avg_period += delta_w;

			/*
			 * Remainder of delta initiates a new period, roll over
			 * the previous.
			 */
			sa->runnable_avg_sum =
				decay_load(sa->runnable_avg_sum, 1);
			sa->runnable_avg_period =
				decay_load(sa->runnable_avg_period, 1);

		delta -= delta_w;
			/* New period is empty */
			delta_w = 1024;
		} while (delta >= 1024);

		/* Figure out how many additional periods this update spans */
		periods = delta / 1024;
		delta %= 1024;

		sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
						  periods + 1);
		sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
						     periods + 1);

		/* Efficiently calculate \sum (1..n_period) 1024*y^i */
		runnable_contrib = __compute_runnable_contrib(periods);
		if (runnable)
			sa->runnable_avg_sum += runnable_contrib;
		sa->runnable_avg_period += runnable_contrib;
	}

	/* Remainder of delta accrued against u_0` */