Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 86a4ac43 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fix from Thomas Gleixner:
 "Revert the new NUMA aware placement approach which turned out to
  create more problems than it solved"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  Revert "sched/numa: Delay retrying placement for automatic NUMA balance after wake_affine()"
parents baeda713 789ba280
Loading
Loading
Loading
Loading
+1 −56
Original line number Diff line number Diff line
@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
static void numa_migrate_preferred(struct task_struct *p)
{
	unsigned long interval = HZ;
	unsigned long numa_migrate_retry;

	/* This task has no NUMA fault statistics yet */
	if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)

	/* Periodically retry migrating the task to the preferred node */
	interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
	numa_migrate_retry = jiffies + interval;

	/*
	 * Check that the new retry threshold is after the current one. If
	 * the retry is in the future, it implies that wake_affine has
	 * temporarily asked NUMA balancing to backoff from placement.
	 */
	if (numa_migrate_retry > p->numa_migrate_retry)
		return;

	/* Safe to try placing the task on the preferred node */
	p->numa_migrate_retry = numa_migrate_retry;
	p->numa_migrate_retry = jiffies + interval;

	/* Success if task is already running on preferred CPU */
	if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
	return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
}

#ifdef CONFIG_NUMA_BALANCING
static void
update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
{
	unsigned long interval;

	if (!static_branch_likely(&sched_numa_balancing))
		return;

	/* If balancing has no preference then continue gathering data */
	if (p->numa_preferred_nid == -1)
		return;

	/*
	 * If the wakeup is not affecting locality then it is neutral from
	 * the perspective of NUMA balacing so continue gathering data.
	 */
	if (cpu_to_node(prev_cpu) == cpu_to_node(target))
		return;

	/*
	 * Temporarily prevent NUMA balancing trying to place waker/wakee after
	 * wakee has been moved by wake_affine. This will potentially allow
	 * related tasks to converge and update their data placement. The
	 * 4 * numa_scan_period is to allow the two-pass filter to migrate
	 * hot data to the wakers node.
	 */
	interval = max(sysctl_numa_balancing_scan_delay,
			 p->numa_scan_period << 2);
	p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);

	interval = max(sysctl_numa_balancing_scan_delay,
			 current->numa_scan_period << 2);
	current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
}
#else
static void
update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
{
}
#endif

static int wake_affine(struct sched_domain *sd, struct task_struct *p,
		       int this_cpu, int prev_cpu, int sync)
{
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
	if (target == nr_cpumask_bits)
		return prev_cpu;

	update_wa_numa_placement(p, prev_cpu, target);
	schedstat_inc(sd->ttwu_move_affine);
	schedstat_inc(p->se.statistics.nr_wakeups_affine);
	return target;