Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (af79ad2b) · Commits · e / devices / android_kernel_oneplus_sm8150

Documentation/scheduler/sched-deadline.txt

+18 −0

Original line number	Diff line number	Diff line
		@@ -16,6 +16,7 @@ CONTENTS
		4.1 System-wide settings
		4.2 Task interface
		4.3 Default behavior
		4.4 Behavior of sched_yield()
		5. Tasks CPU affinity
		5.1 SCHED_DEADLINE and cpusets HOWTO
		6. Future plans
		@@ -426,6 +427,23 @@ CONTENTS
		Finally, notice that in order not to jeopardize the admission control a
		-deadline task cannot fork.


		4.4 Behavior of sched_yield()
		-----------------------------

		When a SCHED_DEADLINE task calls sched_yield(), it gives up its
		remaining runtime and is immediately throttled, until the next
		period, when its runtime will be replenished (a special flag
		dl_yielded is set and used to handle correctly throttling and runtime
		replenishment after a call to sched_yield()).

		This behavior of sched_yield() allows the task to wake-up exactly at
		the beginning of the next period. Also, this may be useful in the
		future with bandwidth reclaiming mechanisms, where sched_yield() will
		make the leftoever runtime available for reclamation by other
		SCHED_DEADLINE tasks.


		5. Tasks CPU affinity
		=====================

arch/ia64/kernel/mca.c

+5 −5

Original line number	Diff line number	Diff line
		@@ -986,7 +986,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
		int cpu = smp_processor_id();

		previous_current = curr_task(cpu);
		set_curr_task(cpu, current);
		ia64_set_curr_task(cpu, current);
		if ((p = strchr(current->comm, ' ')))
		*p = '\0';

		@@ -1360,14 +1360,14 @@ ia64_mca_handler(struct pt_regs regs, struct switch_stack sw,
		cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */
		while (monarch_cpu != -1)
		cpu_relax(); /* spin until last cpu leaves */
		set_curr_task(cpu, previous_current);
		ia64_set_curr_task(cpu, previous_current);
		ia64_mc_info.imi_rendez_checkin[cpu]
		= IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
		return;
		}
		}
		}
		set_curr_task(cpu, previous_current);
		ia64_set_curr_task(cpu, previous_current);
		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
		monarch_cpu = -1; /* This frees the slaves and previous monarchs */
		}
		@@ -1729,7 +1729,7 @@ ia64_init_handler(struct pt_regs regs, struct switch_stack sw,
		NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);

		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
		set_curr_task(cpu, previous_current);
		ia64_set_curr_task(cpu, previous_current);
		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
		atomic_dec(&slaves);
		return;
		@@ -1756,7 +1756,7 @@ ia64_init_handler(struct pt_regs regs, struct switch_stack sw,

		mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
		atomic_dec(&monarchs);
		set_curr_task(cpu, previous_current);
		ia64_set_curr_task(cpu, previous_current);
		monarch_cpu = -1;
		return;
		}

arch/x86/kernel/smpboot.c

+30 −16

Original line number	Diff line number	Diff line
		@@ -471,7 +471,7 @@ static bool match_die(struct cpuinfo_x86 c, struct cpuinfo_x86 o)
		return false;
		}

		static struct sched_domain_topology_level numa_inside_package_topology[] = {
		static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
		#ifdef CONFIG_SCHED_SMT
		{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
		#endif
		@@ -480,22 +480,23 @@ static struct sched_domain_topology_level numa_inside_package_topology[] = {
		#endif
		{ NULL, },
		};

		static struct sched_domain_topology_level x86_topology[] = {
		#ifdef CONFIG_SCHED_SMT
		{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
		#endif
		#ifdef CONFIG_SCHED_MC
		{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
		#endif
		{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
		{ NULL, },
		};

		/*
		* set_sched_topology() sets the topology internal to a CPU. The
		* NUMA topologies are layered on top of it to build the full
		* system topology.
		*
		* If NUMA nodes are observed to occur within a CPU package, this
		* function should be called. It forces the sched domain code to
		* only use the SMT level for the CPU portion of the topology.
		* This essentially falls back to relying on NUMA information
		* from the SRAT table to describe the entire system topology
		* (except for hyperthreads).
		*/
		static void primarily_use_numa_for_topology(void)
		{
		set_sched_topology(numa_inside_package_topology);
		}
		* Set if a package/die has multiple NUMA nodes inside.
		* AMD Magny-Cours and Intel Cluster-on-Die have this.
		*/
		static bool x86_has_numa_in_package;

		void set_cpu_sibling_map(int cpu)
		{
		@@ -558,7 +559,7 @@ void set_cpu_sibling_map(int cpu)
		c->booted_cores = cpu_data(i).booted_cores;
		}
		if (match_die(c, o) && !topology_same_node(c, o))
		primarily_use_numa_for_topology();
		x86_has_numa_in_package = true;
		}

		threads = cpumask_weight(topology_sibling_cpumask(cpu));
		@@ -1304,6 +1305,16 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
		zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
		}

		/*
		* Set 'default' x86 topology, this matches default_topology() in that
		* it has NUMA nodes as a topology level. See also
		* native_smp_cpus_done().
		*
		* Must be done before set_cpus_sibling_map() is ran.
		*/
		set_sched_topology(x86_topology);

		set_cpu_sibling_map(0);

		switch (smp_sanity_check(max_cpus)) {
		@@ -1370,6 +1381,9 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
		{
		pr_debug("Boot done\n");

		if (x86_has_numa_in_package)
		set_sched_topology(x86_numa_in_package_topology);

		nmi_selftest();
		impress_friends();
		setup_ioapic_dest();

include/linux/kernel.h

+3 −6

Original line number	Diff line number	Diff line
		@@ -259,17 +259,14 @@ static inline void might_fault(void) { }
		extern struct atomic_notifier_head panic_notifier_list;
		extern long (*panic_blink)(int state);
		__printf(1, 2)
		void panic(const char *fmt, ...)
		__noreturn __cold;
		void panic(const char *fmt, ...) __noreturn __cold;
		void nmi_panic(struct pt_regs regs, const char msg);
		extern void oops_enter(void);
		extern void oops_exit(void);
		void print_oops_end_marker(void);
		extern int oops_may_print(void);
		void do_exit(long error_code)
		__noreturn;
		void complete_and_exit(struct completion *, long)
		__noreturn;
		void do_exit(long error_code) __noreturn;
		void complete_and_exit(struct completion *, long) __noreturn;

		/* Internal, do not use. */
		int __must_check _kstrtoul(const char s, unsigned int base, unsigned long res);

include/linux/sched.h

+28 −2

Original line number	Diff line number	Diff line
		@@ -448,6 +448,8 @@ static inline void io_schedule(void)
		io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
		}

		void __noreturn do_task_dead(void);

		struct nsproxy;
		struct user_namespace;

		@@ -1022,7 +1024,8 @@ extern void wake_up_q(struct wake_q_head *head);
		#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
		#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
		#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
		#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */
		#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
		#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
		#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
		#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
		#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
		@@ -1064,6 +1067,12 @@ extern int sched_domain_level_max;

		struct sched_group;

		struct sched_domain_shared {
		atomic_t ref;
		atomic_t nr_busy_cpus;
		int has_idle_cores;
		};

		struct sched_domain {
		/* These fields must be setup */
		struct sched_domain parent; / top domain must be null terminated */
		@@ -1094,6 +1103,8 @@ struct sched_domain {
		u64 max_newidle_lb_cost;
		unsigned long next_decay_max_lb_cost;

		u64 avg_scan_cost; /* select_idle_sibling */

		#ifdef CONFIG_SCHEDSTATS
		/* load_balance() stats */
		unsigned int lb_count[CPU_MAX_IDLE_TYPES];
		@@ -1132,6 +1143,7 @@ struct sched_domain {
		void private; / used during construction */
		struct rcu_head rcu; /* used during destruction */
		};
		struct sched_domain_shared *shared;

		unsigned int span_weight;
		/*
		@@ -1165,6 +1177,7 @@ typedef int (*sched_domain_flags_f)(void);

		struct sd_data {
		struct sched_domain **__percpu sd;
		struct sched_domain_shared **__percpu sds;
		struct sched_group **__percpu sg;
		struct sched_group_capacity **__percpu sgc;
		};
		@@ -2568,7 +2581,7 @@ static inline bool is_idle_task(const struct task_struct *p)
		return p->pid == 0;
		}
		extern struct task_struct *curr_task(int cpu);
		extern void set_curr_task(int cpu, struct task_struct *p);
		extern void ia64_set_curr_task(int cpu, struct task_struct *p);

		void yield(void);

		@@ -3206,7 +3219,11 @@ static inline int signal_pending_state(long state, struct task_struct *p)
		* cond_resched_lock() will drop the spinlock before scheduling,
		* cond_resched_softirq() will enable bhs before scheduling.
		*/
		#ifndef CONFIG_PREEMPT
		extern int _cond_resched(void);
		#else
		static inline int _cond_resched(void) { return 0; }
		#endif

		#define cond_resched() ({ \
		___might_sleep(__FILE__, __LINE__, 0); \
		@@ -3236,6 +3253,15 @@ static inline void cond_resched_rcu(void)
		#endif
		}

		static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
		{
		#ifdef CONFIG_DEBUG_PREEMPT
		return p->preempt_disable_ip;
		#else
		return 0;
		#endif
		}

		/*
		* Does a critical section need to be broken due to another
		* task waiting?: (technically does not depend on CONFIG_PREEMPT,