Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (7125face) · Commits · e / devices / android_kernel_xiaomi_markw

arch/x86/include/asm/timer.h

+22 −1

Original line number	Original line	Diff line number	Diff line
	@@ -32,6 +32,22 @@ extern int no_timer_check;
	* (mathieu.desnoyers@polymtl.ca)		* (mathieu.desnoyers@polymtl.ca)
	*		*
	* -johnstul@us.ibm.com "math is hard, lets go shopping!"		* -johnstul@us.ibm.com "math is hard, lets go shopping!"
			*
			* In:
			*
			* ns = cycles * cyc2ns_scale / SC
			*
			* Although we may still have enough bits to store the value of ns,
			* in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
			* leading to an incorrect result.
			*
			* To avoid this, we can decompose 'cycles' into quotient and remainder
			* of division by SC. Then,
			*
			* ns = (quot * SC + rem) * cyc2ns_scale / SC
			* = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
			*
			* - sqazi@google.com
	*/		*/

	DECLARE_PER_CPU(unsigned long, cyc2ns);		DECLARE_PER_CPU(unsigned long, cyc2ns);
	@@ -41,9 +57,14 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);

	static inline unsigned long long __cycles_2_ns(unsigned long long cyc)		static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
	{		{
			unsigned long long quot;
			unsigned long long rem;
	int cpu = smp_processor_id();		int cpu = smp_processor_id();
	unsigned long long ns = per_cpu(cyc2ns_offset, cpu);		unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
	ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;		quot = (cyc >> CYC2NS_SCALE_FACTOR);
			rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
			ns += quot * per_cpu(cyc2ns, cpu) +
			((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
	return ns;		return ns;
	}		}

include/linux/init_task.h

+3 −1

Original line number	Original line	Diff line number	Diff line
	@@ -126,6 +126,8 @@ extern struct cred init_cred;
	# define INIT_PERF_EVENTS(tsk)		# define INIT_PERF_EVENTS(tsk)
	#endif		#endif

			#define INIT_TASK_COMM "swapper"

	/*		/*
	* INIT_TASK is used to set up the first task table, touch at		* INIT_TASK is used to set up the first task table, touch at
	* your own risk!. Base=0, limit=0x1fffff (=2MB)		* your own risk!. Base=0, limit=0x1fffff (=2MB)
	@@ -162,7 +164,7 @@ extern struct cred init_cred;
	.group_leader = &tsk, \		.group_leader = &tsk, \
	RCU_INIT_POINTER(.real_cred, &init_cred), \		RCU_INIT_POINTER(.real_cred, &init_cred), \
	RCU_INIT_POINTER(.cred, &init_cred), \		RCU_INIT_POINTER(.cred, &init_cred), \
	.comm = "swapper", \		.comm = INIT_TASK_COMM, \
	.thread = INIT_THREAD, \		.thread = INIT_THREAD, \
	.fs = &init_fs, \		.fs = &init_fs, \
	.files = &init_files, \		.files = &init_files, \

kernel/sched.c

+17 −0

Original line number	Original line	Diff line number	Diff line
	@@ -71,6 +71,7 @@
	#include <linux/ctype.h>		#include <linux/ctype.h>
	#include <linux/ftrace.h>		#include <linux/ftrace.h>
	#include <linux/slab.h>		#include <linux/slab.h>
			#include <linux/init_task.h>

	#include <asm/tlb.h>		#include <asm/tlb.h>
	#include <asm/irq_regs.h>		#include <asm/irq_regs.h>
	@@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion);
	* This waits for either a completion of a specific task to be signaled or for a		* This waits for either a completion of a specific task to be signaled or for a
	* specified timeout to expire. The timeout is in jiffies. It is not		* specified timeout to expire. The timeout is in jiffies. It is not
	* interruptible.		* interruptible.
			*
			* The return value is 0 if timed out, and positive (at least 1, or number of
			* jiffies left till timeout) if completed.
	*/		*/
	unsigned long __sched		unsigned long __sched
	wait_for_completion_timeout(struct completion *x, unsigned long timeout)		wait_for_completion_timeout(struct completion *x, unsigned long timeout)
	@@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout);
	*		*
	* This waits for completion of a specific task to be signaled. It is		* This waits for completion of a specific task to be signaled. It is
	* interruptible.		* interruptible.
			*
			* The return value is -ERESTARTSYS if interrupted, 0 if completed.
	*/		*/
	int __sched wait_for_completion_interruptible(struct completion *x)		int __sched wait_for_completion_interruptible(struct completion *x)
	{		{
	@@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
	*		*
	* This waits for either a completion of a specific task to be signaled or for a		* This waits for either a completion of a specific task to be signaled or for a
	* specified timeout to expire. It is interruptible. The timeout is in jiffies.		* specified timeout to expire. It is interruptible. The timeout is in jiffies.
			*
			* The return value is -ERESTARTSYS if interrupted, 0 if timed out,
			* positive (at least 1, or number of jiffies left till timeout) if completed.
	*/		*/
	long __sched		long __sched
	wait_for_completion_interruptible_timeout(struct completion *x,		wait_for_completion_interruptible_timeout(struct completion *x,
	@@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
	*		*
	* This waits to be signaled for completion of a specific task. It can be		* This waits to be signaled for completion of a specific task. It can be
	* interrupted by a kill signal.		* interrupted by a kill signal.
			*
			* The return value is -ERESTARTSYS if interrupted, 0 if completed.
	*/		*/
	int __sched wait_for_completion_killable(struct completion *x)		int __sched wait_for_completion_killable(struct completion *x)
	{		{
	@@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable);
	* This waits for either a completion of a specific task to be		* This waits for either a completion of a specific task to be
	* signaled or for a specified timeout to expire. It can be		* signaled or for a specified timeout to expire. It can be
	* interrupted by a kill signal. The timeout is in jiffies.		* interrupted by a kill signal. The timeout is in jiffies.
			*
			* The return value is -ERESTARTSYS if interrupted, 0 if timed out,
			* positive (at least 1, or number of jiffies left till timeout) if completed.
	*/		*/
	long __sched		long __sched
	wait_for_completion_killable_timeout(struct completion *x,		wait_for_completion_killable_timeout(struct completion *x,
	@@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
	*/		*/
	idle->sched_class = &idle_sched_class;		idle->sched_class = &idle_sched_class;
	ftrace_graph_init_idle_task(idle, cpu);		ftrace_graph_init_idle_task(idle, cpu);
			#if defined(CONFIG_SMP)
			sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
			#endif
	}		}

	/*		/*

kernel/sched_fair.c

+125 −34

Original line number	Original line	Diff line number	Diff line
	@@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
	list_del_leaf_cfs_rq(cfs_rq);		list_del_leaf_cfs_rq(cfs_rq);
	}		}

			static inline long calc_tg_weight(struct task_group tg, struct cfs_rq cfs_rq)
			{
			long tg_weight;

			/*
			* Use this CPU's actual weight instead of the last load_contribution
			* to gain a more accurate current total weight. See
			* update_cfs_rq_load_contribution().
			*/
			tg_weight = atomic_read(&tg->load_weight);
			tg_weight -= cfs_rq->load_contribution;
			tg_weight += cfs_rq->load.weight;

			return tg_weight;
			}

	static long calc_cfs_shares(struct cfs_rq cfs_rq, struct task_group tg)		static long calc_cfs_shares(struct cfs_rq cfs_rq, struct task_group tg)
	{		{
	long load_weight, load, shares;		long tg_weight, load, shares;

			tg_weight = calc_tg_weight(tg, cfs_rq);
	load = cfs_rq->load.weight;		load = cfs_rq->load.weight;

	load_weight = atomic_read(&tg->load_weight);
	load_weight += load;
	load_weight -= cfs_rq->load_contribution;

	shares = (tg->shares * load);		shares = (tg->shares * load);
	if (load_weight)		if (tg_weight)
	shares /= load_weight;		shares /= tg_weight;

	if (shares < MIN_SHARES)		if (shares < MIN_SHARES)
	shares = MIN_SHARES;		shares = MIN_SHARES;
	@@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)

	static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)		static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
	{		{
	if (!cfs_rq->runtime_enabled \|\| !cfs_rq->nr_running)		if (!cfs_rq->runtime_enabled \|\| cfs_rq->nr_running)
	return;		return;

	__return_cfs_rq_runtime(cfs_rq);		__return_cfs_rq_runtime(cfs_rq);
	@@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p)
	* Adding load to a group doesn't make a group heavier, but can cause movement		* Adding load to a group doesn't make a group heavier, but can cause movement
	* of group shares between cpus. Assuming the shares were perfectly aligned one		* of group shares between cpus. Assuming the shares were perfectly aligned one
	* can calculate the shift in shares.		* can calculate the shift in shares.
			*
			* Calculate the effective load difference if @wl is added (subtracted) to @tg
			* on this @cpu and results in a total addition (subtraction) of @wg to the
			* total group weight.
			*
			* Given a runqueue weight distribution (rw_i) we can compute a shares
			* distribution (s_i) using:
			*
			* s_i = rw_i / \Sum rw_j (1)
			*
			* Suppose we have 4 CPUs and our @tg is a direct child of the root group and
			* has 7 equal weight tasks, distributed as below (rw_i), with the resulting
			* shares distribution (s_i):
			*
			* rw_i = { 2, 4, 1, 0 }
			* s_i = { 2/7, 4/7, 1/7, 0 }
			*
			* As per wake_affine() we're interested in the load of two CPUs (the CPU the
			* task used to run on and the CPU the waker is running on), we need to
			* compute the effect of waking a task on either CPU and, in case of a sync
			* wakeup, compute the effect of the current task going to sleep.
			*
			* So for a change of @wl to the local @cpu with an overall group weight change
			* of @wl we can compute the new shares distribution (s'_i) using:
			*
			* s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2)
			*
			* Suppose we're interested in CPUs 0 and 1, and want to compute the load
			* differences in waking a task to CPU 0. The additional task changes the
			* weight and shares distributions like:
			*
			* rw'_i = { 3, 4, 1, 0 }
			* s'_i = { 3/8, 4/8, 1/8, 0 }
			*
			* We can then compute the difference in effective weight by using:
			*
			* dw_i = S * (s'_i - s_i) (3)
			*
			* Where 'S' is the group weight as seen by its parent.
			*
			* Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
			* times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
			* 4/7) times the weight of the group.
	*/		*/
	static long effective_load(struct task_group *tg, int cpu, long wl, long wg)		static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
	{		{
	struct sched_entity *se = tg->se[cpu];		struct sched_entity *se = tg->se[cpu];

	if (!tg->parent)		if (!tg->parent) /* the trivial, non-cgroup case */
	return wl;		return wl;

	for_each_sched_entity(se) {		for_each_sched_entity(se) {
	long lw, w;		long w, W;

	tg = se->my_q->tg;		tg = se->my_q->tg;
	w = se->my_q->load.weight;

	/* use this cpu's instantaneous contribution */		/*
	lw = atomic_read(&tg->load_weight);		* W = @wg + \Sum rw_j
	lw -= se->my_q->load_contribution;		*/
	lw += w + wg;		W = wg + calc_tg_weight(tg, se->my_q);

	wl += w;		/*
			* w = rw_i + @wl
			*/
			w = se->my_q->load.weight + wl;

	if (lw > 0 && wl < lw)		/*
	wl = (wl * tg->shares) / lw;		* wl = S * s'_i; see (2)
			*/
			if (W > 0 && w < W)
			wl = (w * tg->shares) / W;
	else		else
	wl = tg->shares;		wl = tg->shares;

	/* zero point is MIN_SHARES */		/*
			* Per the above, wl is the new se->load.weight value; since
			* those are clipped to [MIN_SHARES, ...) do so now. See
			* calc_cfs_shares().
			*/
	if (wl < MIN_SHARES)		if (wl < MIN_SHARES)
	wl = MIN_SHARES;		wl = MIN_SHARES;

			/*
			* wl = dw_i = S * (s'_i - s_i); see (3)
			*/
	wl -= se->load.weight;		wl -= se->load.weight;

			/*
			* Recursively apply this logic to all parent groups to compute
			* the final effective load change on the root group. Since
			* only the @tg group gets extra weight, all parent groups can
			* only redistribute existing shares. @wl is the shift in shares
			* resulting from this level per the above.
			*/
	wg = 0;		wg = 0;
	}		}

	@@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
	int cpu = smp_processor_id();		int cpu = smp_processor_id();
	int prev_cpu = task_cpu(p);		int prev_cpu = task_cpu(p);
	struct sched_domain *sd;		struct sched_domain *sd;
	int i;		struct sched_group *sg;
			int i, smt = 0;

	/*		/*
	* If the task is going to be woken-up on this cpu and if it is		* If the task is going to be woken-up on this cpu and if it is
	@@ -2269,25 +2347,38 @@ static int select_idle_sibling(struct task_struct *p, int target)
	* Otherwise, iterate the domains and find an elegible idle cpu.		* Otherwise, iterate the domains and find an elegible idle cpu.
	*/		*/
	rcu_read_lock();		rcu_read_lock();
			again:
	for_each_domain(target, sd) {		for_each_domain(target, sd) {
	if (!(sd->flags & SD_SHARE_PKG_RESOURCES))		if (!smt && (sd->flags & SD_SHARE_CPUPOWER))
	break;		continue;

	for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) {		if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) {
	if (idle_cpu(i)) {		if (!smt) {
	target = i;		smt = 1;
			goto again;
			}
	break;		break;
	}		}

			sg = sd->groups;
			do {
			if (!cpumask_intersects(sched_group_cpus(sg),
			tsk_cpus_allowed(p)))
			goto next;

			for_each_cpu(i, sched_group_cpus(sg)) {
			if (!idle_cpu(i))
			goto next;
	}		}

	/*		target = cpumask_first_and(sched_group_cpus(sg),
	* Lets stop looking for an idle sibling when we reached		tsk_cpus_allowed(p));
	* the domain that spans the current cpu and prev_cpu.		goto done;
	*/		next:
	if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&		sg = sg->next;
	cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))		} while (sg != sd->groups);
	break;
	}		}
			done:
	rcu_read_unlock();		rcu_read_unlock();

	return target;		return target;
	@@ -3511,7 +3602,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
	}		}

	/**		/**
	* update_sd_lb_stats - Update sched_group's statistics for load balancing.		* update_sd_lb_stats - Update sched_domain's statistics for load balancing.
	* @sd: sched_domain whose statistics are to be updated.		* @sd: sched_domain whose statistics are to be updated.
	* @this_cpu: Cpu for which load balance is currently performed.		* @this_cpu: Cpu for which load balance is currently performed.
	* @idle: Idle status of this_cpu		* @idle: Idle status of this_cpu

kernel/sched_features.h

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1)
	SCHED_FEAT(TTWU_QUEUE, 1)		SCHED_FEAT(TTWU_QUEUE, 1)

	SCHED_FEAT(FORCE_SD_OVERLAP, 0)		SCHED_FEAT(FORCE_SD_OVERLAP, 0)
			SCHED_FEAT(RT_RUNTIME_SHARE, 1)