Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (49d2953c) · Commits · e / devices / android_kernel_oneplus_sm7250

arch/x86/include/asm/mwait.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
		:: "a" (eax), "c" (ecx));
		}

		static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
		{
		trace_hardirqs_on();
		/* "mwait %eax, %ecx;" */
		asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
		:: "a" (eax), "c" (ecx));
		}

		/*
		* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
		* which can obviate IPI to trigger checking of need_resched.

arch/x86/kernel/process.c

+51 −0

Original line number	Diff line number	Diff line
		@@ -24,6 +24,7 @@
		#include <asm/syscalls.h>
		#include <asm/idle.h>
		#include <asm/uaccess.h>
		#include <asm/mwait.h>
		#include <asm/i387.h>
		#include <asm/fpu-internal.h>
		#include <asm/debugreg.h>
		@@ -399,6 +400,53 @@ static void amd_e400_idle(void)
		default_idle();
		}

		/*
		* Intel Core2 and older machines prefer MWAIT over HALT for C1.
		* We can't rely on cpuidle installing MWAIT, because it will not load
		* on systems that support only C1 -- so the boot default must be MWAIT.
		*
		* Some AMD machines are the opposite, they depend on using HALT.
		*
		* So for default C1, which is used during boot until cpuidle loads,
		* use MWAIT-C1 on Intel HW that has it, else use HALT.
		*/
		static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
		{
		if (c->x86_vendor != X86_VENDOR_INTEL)
		return 0;

		if (!cpu_has(c, X86_FEATURE_MWAIT))
		return 0;

		return 1;
		}

		/*
		* MONITOR/MWAIT with no hints, used for default default C1 state.
		* This invokes MWAIT with interrutps enabled and no flags,
		* which is backwards compatible with the original MWAIT implementation.
		*/

		static void mwait_idle(void)
		{
		if (!current_set_polling_and_test()) {
		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
		smp_mb(); /* quirk */
		clflush((void *)&current_thread_info()->flags);
		smp_mb(); /* quirk */
		}

		__monitor((void *)&current_thread_info()->flags, 0, 0);
		if (!need_resched())
		__sti_mwait(0, 0);
		else
		local_irq_enable();
		} else {
		local_irq_enable();
		}
		__current_clr_polling();
		}

		void select_idle_routine(const struct cpuinfo_x86 *c)
		{
		#ifdef CONFIG_SMP
		@@ -412,6 +460,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
		/* E400: APIC timer interrupt does not wake up CPU from C1e */
		pr_info("using AMD E400 aware idle routine\n");
		x86_idle = amd_e400_idle;
		} else if (prefer_mwait_c1_over_halt(c)) {
		pr_info("using mwait in idle threads\n");
		x86_idle = mwait_idle;
		} else
		x86_idle = default_idle;
		}

include/linux/irq_work.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -38,16 +38,17 @@ bool irq_work_queue(struct irq_work *work);
		bool irq_work_queue_on(struct irq_work *work, int cpu);
		#endif

		void irq_work_run(void);
		void irq_work_tick(void);
		void irq_work_sync(struct irq_work *work);

		#ifdef CONFIG_IRQ_WORK
		#include <asm/irq_work.h>

		void irq_work_run(void);
		bool irq_work_needs_cpu(void);
		#else
		static inline bool irq_work_needs_cpu(void) { return false; }
		static inline void irq_work_run(void) { }
		#endif

		#endif /* _LINUX_IRQ_WORK_H */

include/linux/sched.h

+17 −4

Original line number	Diff line number	Diff line
		@@ -1123,15 +1123,28 @@ struct load_weight {
		};

		struct sched_avg {
		u64 last_runnable_update;
		s64 decay_count;
		/*
		* utilization_avg_contrib describes the amount of time that a
		* sched_entity is running on a CPU. It is based on running_avg_sum
		* and is scaled in the range [0..SCHED_LOAD_SCALE].
		* load_avg_contrib described the amount of time that a sched_entity
		* is runnable on a rq. It is based on both runnable_avg_sum and the
		* weight of the task.
		*/
		unsigned long load_avg_contrib, utilization_avg_contrib;
		/*
		* These sums represent an infinite geometric series and so are bound
		* above by 1024/(1-y). Thus we only need a u32 to store them for all
		* choices of y < 1-2^(-32)*1024.
		* running_avg_sum reflects the time that the sched_entity is
		* effectively running on the CPU.
		* runnable_avg_sum represents the amount of time a sched_entity is on
		* a runqueue which includes the running time that is monitored by
		* running_avg_sum.
		*/
		u32 runnable_avg_sum, runnable_avg_period;
		u64 last_runnable_update;
		s64 decay_count;
		unsigned long load_avg_contrib;
		u32 runnable_avg_sum, avg_period, running_avg_sum;
		};

		#ifdef CONFIG_SCHEDSTATS

kernel/sched/core.c

+50 −46

Original line number	Diff line number	Diff line
		@@ -689,6 +689,23 @@ static inline bool got_nohz_idle_kick(void)
		#ifdef CONFIG_NO_HZ_FULL
		bool sched_can_stop_tick(void)
		{
		/*
		* FIFO realtime policy runs the highest priority task. Other runnable
		* tasks are of a lower priority. The scheduler tick does nothing.
		*/
		if (current->policy == SCHED_FIFO)
		return true;

		/*
		* Round-robin realtime tasks time slice with other tasks at the same
		* realtime priority. Is this task the only one at this priority?
		*/
		if (current->policy == SCHED_RR) {
		struct sched_rt_entity *rt_se = &current->rt;

		return rt_se->run_list.prev == rt_se->run_list.next;
		}

		/*
		* More than one running task need preemption.
		* nr_running update is assumed to be visible
		@@ -5335,37 +5352,14 @@ static int sched_cpu_active(struct notifier_block *nfb,
		static int sched_cpu_inactive(struct notifier_block *nfb,
		unsigned long action, void *hcpu)
		{
		unsigned long flags;
		long cpu = (long)hcpu;
		struct dl_bw *dl_b;

		switch (action & ~CPU_TASKS_FROZEN) {
		case CPU_DOWN_PREPARE:
		set_cpu_active(cpu, false);

		/* explicitly allow suspend */
		if (!(action & CPU_TASKS_FROZEN)) {
		bool overflow;
		int cpus;

		rcu_read_lock_sched();
		dl_b = dl_bw_of(cpu);

		raw_spin_lock_irqsave(&dl_b->lock, flags);
		cpus = dl_bw_cpus(cpu);
		overflow = __dl_overflow(dl_b, cpus, 0, 0);
		raw_spin_unlock_irqrestore(&dl_b->lock, flags);

		rcu_read_unlock_sched();

		if (overflow)
		return notifier_from_errno(-EBUSY);
		}
		set_cpu_active((long)hcpu, false);
		return NOTIFY_OK;
		}

		default:
		return NOTIFY_DONE;
		}
		}

		static int __init migration_init(void)
		{
		@@ -5445,17 +5439,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
		break;
		}

		/*
		* Even though we initialize ->capacity to something semi-sane,
		* we leave capacity_orig unset. This allows us to detect if
		* domain iteration is still funny without causing /0 traps.
		*/
		if (!group->sgc->capacity_orig) {
		printk(KERN_CONT "\n");
		printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
		break;
		}

		if (!cpumask_weight(sched_group_cpus(group))) {
		printk(KERN_CONT "\n");
		printk(KERN_ERR "ERROR: empty group\n");
		@@ -5939,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
		* die on a /0 trap.
		*/
		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
		sg->sgc->capacity_orig = sg->sgc->capacity;

		/*
		* Make sure the first group of this domain contains the
		@@ -6250,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
		*/

		if (sd->flags & SD_SHARE_CPUCAPACITY) {
		sd->flags \|= SD_PREFER_SIBLING;
		sd->imbalance_pct = 110;
		sd->smt_gain = 1178; /* ~15% */

		@@ -7015,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
		*/

		case CPU_ONLINE:
		case CPU_DOWN_FAILED:
		cpuset_update_active_cpus(true);
		break;
		default:
		@@ -7027,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
		static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
		void *hcpu)
		{
		switch (action) {
		unsigned long flags;
		long cpu = (long)hcpu;
		struct dl_bw *dl_b;

		switch (action & ~CPU_TASKS_FROZEN) {
		case CPU_DOWN_PREPARE:
		/* explicitly allow suspend */
		if (!(action & CPU_TASKS_FROZEN)) {
		bool overflow;
		int cpus;

		rcu_read_lock_sched();
		dl_b = dl_bw_of(cpu);

		raw_spin_lock_irqsave(&dl_b->lock, flags);
		cpus = dl_bw_cpus(cpu);
		overflow = __dl_overflow(dl_b, cpus, 0, 0);
		raw_spin_unlock_irqrestore(&dl_b->lock, flags);

		rcu_read_unlock_sched();

		if (overflow)
		return notifier_from_errno(-EBUSY);
		}
		cpuset_update_active_cpus(false);
		break;
		case CPU_DOWN_PREPARE_FROZEN:
		@@ -7173,8 +7177,8 @@ void __init sched_init(void)
		rq->calc_load_active = 0;
		rq->calc_load_update = jiffies + LOAD_FREQ;
		init_cfs_rq(&rq->cfs);
		init_rt_rq(&rq->rt, rq);
		init_dl_rq(&rq->dl, rq);
		init_rt_rq(&rq->rt);
		init_dl_rq(&rq->dl);
		#ifdef CONFIG_FAIR_GROUP_SCHED
		root_task_group.shares = ROOT_TASK_GROUP_LOAD;
		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
		@@ -7214,7 +7218,7 @@ void __init sched_init(void)
		#ifdef CONFIG_SMP
		rq->sd = NULL;
		rq->rd = NULL;
		rq->cpu_capacity = SCHED_CAPACITY_SCALE;
		rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
		rq->post_schedule = 0;
		rq->active_balance = 0;
		rq->next_balance = jiffies;
		@@ -7813,7 +7817,7 @@ static int sched_rt_global_constraints(void)
		}
		#endif /* CONFIG_RT_GROUP_SCHED */

		static int sched_dl_global_constraints(void)
		static int sched_dl_global_validate(void)
		{
		u64 runtime = global_rt_runtime();
		u64 period = global_rt_period();
		@@ -7914,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write,
		if (ret)
		goto undo;

		ret = sched_rt_global_constraints();
		ret = sched_dl_global_validate();
		if (ret)
		goto undo;

		ret = sched_dl_global_constraints();
		ret = sched_rt_global_constraints();
		if (ret)
		goto undo;