Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f7f4e7fc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - power-aware scheduling improvements (Patrick Bellasi)

 - NUMA balancing improvements (Mel Gorman)

 - vCPU scheduling fixes (Rohit Jain)

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Update util_est before updating schedutil
  sched/cpufreq: Modify aggregate utilization to always include blocked FAIR utilization
  sched/deadline/Documentation: Add overrun signal and GRUB-PA documentation
  sched/core: Distinguish between idle_cpu() calls based on desired effect, introduce available_idle_cpu()
  sched/wait: Include <linux/wait.h> in <linux/swait.h>
  sched/numa: Stagger NUMA balancing scan periods for new threads
  sched/core: Don't schedule threads on pre-empted vCPUs
  sched/fair: Avoid calling sync_entity_load_avg() unnecessarily
  sched/fair: Rearrange select_task_rq_fair() to optimize it
parents d9b446e2 2539fc82
Loading
Loading
Loading
Loading
+24 −1
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ CONTENTS
2.1 Main algorithm
------------------

 SCHED_DEADLINE uses three parameters, named "runtime", "period", and
 SCHED_DEADLINE [18] uses three parameters, named "runtime", "period", and
 "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
 "runtime" microseconds of execution time every "period" microseconds, and
 these "runtime" microseconds are available within "deadline" microseconds
@@ -117,6 +117,10 @@ CONTENTS
         scheduling deadline = scheduling deadline + period
         remaining runtime = remaining runtime + runtime

 The SCHED_FLAG_DL_OVERRUN flag in sched_attr's sched_flags field allows a task
 to get informed about runtime overruns through the delivery of SIGXCPU
 signals.


2.2 Bandwidth reclaiming
------------------------
@@ -279,6 +283,19 @@ CONTENTS
    running_bw is incremented.


2.3 Energy-aware scheduling
------------------------

 When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
 GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
 value that still allows to meet the deadlines. This behavior is currently
 implemented only for ARM architectures.

 A particular care must be taken in case the time needed for changing frequency
 is of the same order of magnitude of the reservation period. In such cases,
 setting a fixed CPU frequency results in a lower amount of deadline misses.


3. Scheduling Real-Time Tasks
=============================

@@ -505,6 +522,12 @@ CONTENTS
  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
       Computing, 2016.
  18 - J. Lelli, C. Scordino, L. Abeni, D. Faggioli, Deadline scheduling in the
       Linux kernel, Software: Practice and Experience, 46(6): 821-839, June
       2016.
  19 - C. Scordino, L. Abeni, J. Lelli, Energy-Aware Real-Time Scheduling in
       the Linux Kernel, 33rd ACM/SIGAPP Symposium On Applied Computing (SAC
       2018), Pau, France, April 2018.


4. Bandwidth management
+1 −0
Original line number Diff line number Diff line
@@ -1512,6 +1512,7 @@ static inline int task_nice(const struct task_struct *p)
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
+1 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <linux/list.h>
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <asm/current.h>

/*
+18 −21
Original line number Diff line number Diff line
@@ -2194,27 +2194,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
	INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif

#ifdef CONFIG_NUMA_BALANCING
	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
		p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
		p->mm->numa_scan_seq = 0;
	}

	if (clone_flags & CLONE_VM)
		p->numa_preferred_nid = current->numa_preferred_nid;
	else
		p->numa_preferred_nid = -1;

	p->node_stamp = 0ULL;
	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
	p->numa_work.next = &p->numa_work;
	p->numa_faults = NULL;
	p->last_task_numa_placement = 0;
	p->last_sum_exec_runtime = 0;

	p->numa_group = NULL;
#endif /* CONFIG_NUMA_BALANCING */
	init_numa_balancing(clone_flags, p);
}

DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -4049,6 +4029,23 @@ int idle_cpu(int cpu)
	return 1;
}

/**
 * available_idle_cpu - is a given CPU idle for enqueuing work.
 * @cpu: the CPU in question.
 *
 * Return: 1 if the CPU is currently idle. 0 otherwise.
 */
int available_idle_cpu(int cpu)
{
	if (!idle_cpu(cpu))
		return 0;

	if (vcpu_is_preempted(cpu))
		return 0;

	return 1;
}

/**
 * idle_task - return the idle task for a given CPU.
 * @cpu: the processor in question.
+8 −9
Original line number Diff line number Diff line
@@ -183,22 +183,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
{
	struct rq *rq = cpu_rq(sg_cpu->cpu);
	unsigned long util;

	if (rq->rt.rt_nr_running) {
		util = sg_cpu->max;
	} else {
		util = sg_cpu->util_dl;
		if (rq->cfs.h_nr_running)
			util += sg_cpu->util_cfs;
	}
	if (rq->rt.rt_nr_running)
		return sg_cpu->max;

	/*
	 * Utilization required by DEADLINE must always be granted while, for
	 * FAIR, we use blocked utilization of IDLE CPUs as a mechanism to
	 * gracefully reduce the frequency when no tasks show up for longer
	 * periods of time.
	 *
	 * Ideally we would like to set util_dl as min/guaranteed freq and
	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
	 * ready for such an interface. So, we only do the latter for now.
	 */
	return min(util, sg_cpu->max);
	return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));
}

static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
Loading