Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ffda12a1 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

sched: optimize group load balancer



I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus
in the sched_domain. This hurts.

We need the rq-locks whenever we change the weight of the per-cpu group sched
entities. To allevate this a little, only change the weight when the new
weight is at least shares_thresh away from the old value.

This avoids the rq-lock for the top level entries, since those will never
be re-weighted, and fuzzes the lower level entries a little to gain performance
in semi-stable situations.

Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent b0aa51b9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1621,6 +1621,7 @@ extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_shares_ratelimit;
extern unsigned int sysctl_sched_shares_thresh;

int sched_nr_latency_handler(struct ctl_table *table, int write,
		struct file *file, void __user *buffer, size_t *length,
+25 −20
Original line number Diff line number Diff line
@@ -817,6 +817,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 */
unsigned int sysctl_sched_shares_ratelimit = 250000;

/*
 * Inject some fuzzyness into changing the per-cpu group shares
 * this avoids remote rq-locks at the expense of fairness.
 * default: 4
 */
unsigned int sysctl_sched_shares_thresh = 4;

/*
 * period over which we measure -rt task cpu usage in us.
 * default: 1s
@@ -1453,7 +1460,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 * Calculate and set the cpu's group shares.
 */
static void
__update_group_shares_cpu(struct task_group *tg, int cpu,
update_group_shares_cpu(struct task_group *tg, int cpu,
			unsigned long sd_shares, unsigned long sd_rq_weight)
{
	int boost = 0;
@@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
	 *
	 */
	shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
	shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);

	if (abs(shares - tg->se[cpu]->load.weight) >
			sysctl_sched_shares_thresh) {
		struct rq *rq = cpu_rq(cpu);
		unsigned long flags;

		spin_lock_irqsave(&rq->lock, flags);
		/*
		 * record the actual number of shares, not the boosted amount.
		 */
		tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
		tg->cfs_rq[cpu]->rq_weight = rq_weight;

	if (shares < MIN_SHARES)
		shares = MIN_SHARES;
	else if (shares > MAX_SHARES)
		shares = MAX_SHARES;

		__set_se_shares(tg->se[cpu], shares);
		spin_unlock_irqrestore(&rq->lock, flags);
	}
}

/*
@@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
	if (!rq_weight)
		rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;

	for_each_cpu_mask(i, sd->span) {
		struct rq *rq = cpu_rq(i);
		unsigned long flags;

		spin_lock_irqsave(&rq->lock, flags);
		__update_group_shares_cpu(tg, i, shares, rq_weight);
		spin_unlock_irqrestore(&rq->lock, flags);
	}
	for_each_cpu_mask(i, sd->span)
		update_group_shares_cpu(tg, i, shares, rq_weight);

	return 0;
}
+10 −0
Original line number Diff line number Diff line
@@ -274,6 +274,16 @@ static struct ctl_table kern_table[] = {
		.mode		= 0644,
		.proc_handler	= &proc_dointvec,
	},
	{
		.ctl_name	= CTL_UNNUMBERED,
		.procname	= "sched_shares_thresh",
		.data		= &sysctl_sched_shares_thresh,
		.maxlen		= sizeof(unsigned int),
		.mode		= 0644,
		.proc_handler	= &proc_dointvec_minmax,
		.strategy	= &sysctl_intvec,
		.extra1		= &zero,
	},
	{
		.ctl_name	= CTL_UNNUMBERED,
		.procname	= "sched_child_runs_first",