Merge branch 'sched-fixes-for-linus' of... (21228e45) · Commits · e / devices / android_kernel_xiaomi_markw

include/linux/sched.h

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
	extern unsigned long this_cpu_load(void);		extern unsigned long this_cpu_load(void);


	extern void calc_global_load(void);		extern void calc_global_load(unsigned long ticks);

	extern unsigned long get_parent_ip(unsigned long addr);		extern unsigned long get_parent_ip(unsigned long addr);

kernel/fork.c

+1 −0

Original line number	Original line	Diff line number	Diff line
	@@ -273,6 +273,7 @@ static struct task_struct dup_task_struct(struct task_struct orig)

	setup_thread_stack(tsk, orig);		setup_thread_stack(tsk, orig);
	clear_user_return_notifier(tsk);		clear_user_return_notifier(tsk);
			clear_tsk_need_resched(tsk);
	stackend = end_of_stack(tsk);		stackend = end_of_stack(tsk);
	stackend = STACK_END_MAGIC; / for overflow detection */		stackend = STACK_END_MAGIC; / for overflow detection */

kernel/sched.c

+237 −51

Original line number	Original line	Diff line number	Diff line
	@@ -636,22 +636,18 @@ static inline struct task_group task_group(struct task_struct p)

	#endif /* CONFIG_CGROUP_SCHED */		#endif /* CONFIG_CGROUP_SCHED */

	static u64 irq_time_cpu(int cpu);		static void update_rq_clock_task(struct rq *rq, s64 delta);
	static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);

	inline void update_rq_clock(struct rq *rq)		static void update_rq_clock(struct rq *rq)
	{		{
	if (!rq->skip_clock_update) {		s64 delta;
	int cpu = cpu_of(rq);
	u64 irq_time;

	rq->clock = sched_clock_cpu(cpu);		if (rq->skip_clock_update)
	irq_time = irq_time_cpu(cpu);		return;
	if (rq->clock - irq_time > rq->clock_task)
	rq->clock_task = rq->clock - irq_time;

	sched_irq_time_avg_update(rq, irq_time);		delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
	}		rq->clock += delta;
			update_rq_clock_task(rq, delta);
	}		}

	/*		/*
	@@ -1924,10 +1920,9 @@ static void deactivate_task(struct rq rq, struct task_struct p, int flags)
	* They are read and saved off onto struct rq in update_rq_clock().		* They are read and saved off onto struct rq in update_rq_clock().
	* This may result in other CPU reading this CPU's irq time and can		* This may result in other CPU reading this CPU's irq time and can
	* race with irq/account_system_vtime on this CPU. We would either get old		* race with irq/account_system_vtime on this CPU. We would either get old
	* or new value (or semi updated value on 32 bit) with a side effect of		* or new value with a side effect of accounting a slice of irq time to wrong
	* accounting a slice of irq time to wrong task when irq is in progress		* task when irq is in progress while we read rq->clock. That is a worthy
	* while we read rq->clock. That is a worthy compromise in place of having		* compromise in place of having locks on each irq in account_system_time.
	* locks on each irq in account_system_time.
	*/		*/
	static DEFINE_PER_CPU(u64, cpu_hardirq_time);		static DEFINE_PER_CPU(u64, cpu_hardirq_time);
	static DEFINE_PER_CPU(u64, cpu_softirq_time);		static DEFINE_PER_CPU(u64, cpu_softirq_time);
	@@ -1945,19 +1940,58 @@ void disable_sched_clock_irqtime(void)
	sched_clock_irqtime = 0;		sched_clock_irqtime = 0;
	}		}

	static u64 irq_time_cpu(int cpu)		#ifndef CONFIG_64BIT
			static DEFINE_PER_CPU(seqcount_t, irq_time_seq);

			static inline void irq_time_write_begin(void)
	{		{
	if (!sched_clock_irqtime)		__this_cpu_inc(irq_time_seq.sequence);
	return 0;		smp_wmb();
			}

			static inline void irq_time_write_end(void)
			{
			smp_wmb();
			__this_cpu_inc(irq_time_seq.sequence);
			}

			static inline u64 irq_time_read(int cpu)
			{
			u64 irq_time;
			unsigned seq;

			do {
			seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
			irq_time = per_cpu(cpu_softirq_time, cpu) +
			per_cpu(cpu_hardirq_time, cpu);
			} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));

			return irq_time;
			}
			#else /* CONFIG_64BIT */
			static inline void irq_time_write_begin(void)
			{
			}

			static inline void irq_time_write_end(void)
			{
			}

			static inline u64 irq_time_read(int cpu)
			{
	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);		return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
	}		}
			#endif /* CONFIG_64BIT */

			/*
			* Called before incrementing preempt_count on {soft,}irq_enter
			* and before decrementing preempt_count on {soft,}irq_exit.
			*/
	void account_system_vtime(struct task_struct *curr)		void account_system_vtime(struct task_struct *curr)
	{		{
	unsigned long flags;		unsigned long flags;
			s64 delta;
	int cpu;		int cpu;
	u64 now, delta;

	if (!sched_clock_irqtime)		if (!sched_clock_irqtime)
	return;		return;
	@@ -1965,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr)
	local_irq_save(flags);		local_irq_save(flags);

	cpu = smp_processor_id();		cpu = smp_processor_id();
	now = sched_clock_cpu(cpu);		delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
	delta = now - per_cpu(irq_start_time, cpu);		__this_cpu_add(irq_start_time, delta);
	per_cpu(irq_start_time, cpu) = now;
			irq_time_write_begin();
	/*		/*
	* We do not account for softirq time from ksoftirqd here.		* We do not account for softirq time from ksoftirqd here.
	* We want to continue accounting softirq time to ksoftirqd thread		* We want to continue accounting softirq time to ksoftirqd thread
	@@ -1975,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr)
	* that do not consume any time, but still wants to run.		* that do not consume any time, but still wants to run.
	*/		*/
	if (hardirq_count())		if (hardirq_count())
	per_cpu(cpu_hardirq_time, cpu) += delta;		__this_cpu_add(cpu_hardirq_time, delta);
	else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))		else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
	per_cpu(cpu_softirq_time, cpu) += delta;		__this_cpu_add(cpu_softirq_time, delta);

			irq_time_write_end();
	local_irq_restore(flags);		local_irq_restore(flags);
	}		}
	EXPORT_SYMBOL_GPL(account_system_vtime);		EXPORT_SYMBOL_GPL(account_system_vtime);

	static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)		static void update_rq_clock_task(struct rq *rq, s64 delta)
	{		{
	if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {		s64 irq_delta;
	u64 delta_irq = curr_irq_time - rq->prev_irq_time;
	rq->prev_irq_time = curr_irq_time;		irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
	sched_rt_avg_update(rq, delta_irq);
	}		/*
			* Since irq_time is only updated on {soft,}irq_exit, we might run into
			* this case when a previous update_rq_clock() happened inside a
			* {soft,}irq region.
			*
			* When this happens, we stop ->clock_task and only update the
			* prev_irq_time stamp to account for the part that fit, so that a next
			* update will consume the rest. This ensures ->clock_task is
			* monotonic.
			*
			* It does however cause some slight miss-attribution of {soft,}irq
			* time, a more accurate solution would be to update the irq_time using
			* the current rq->clock timestamp, except that would require using
			* atomic ops.
			*/
			if (irq_delta > delta)
			irq_delta = delta;

			rq->prev_irq_time += irq_delta;
			delta -= irq_delta;
			rq->clock_task += delta;

			if (irq_delta && sched_feat(NONIRQ_POWER))
			sched_rt_avg_update(rq, irq_delta);
	}		}

	#else		#else /* CONFIG_IRQ_TIME_ACCOUNTING */

	static u64 irq_time_cpu(int cpu)		static void update_rq_clock_task(struct rq *rq, s64 delta)
	{		{
	return 0;		rq->clock_task += delta;
	}		}

	static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }		#endif /* CONFIG_IRQ_TIME_ACCOUNTING */

	#endif

	#include "sched_idletask.c"		#include "sched_idletask.c"
	#include "sched_fair.c"		#include "sched_fair.c"
	@@ -2129,7 +2186,7 @@ static void check_preempt_curr(struct rq rq, struct task_struct p, int flags)
	* A queue event has occurred, and we're going to schedule. In		* A queue event has occurred, and we're going to schedule. In
	* this case, we can save a useless back to back clock update.		* this case, we can save a useless back to back clock update.
	*/		*/
	if (test_tsk_need_resched(rq->curr))		if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
	rq->skip_clock_update = 1;		rq->skip_clock_update = 1;
	}		}

	@@ -3119,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq)
	return delta;		return delta;
	}		}

			static unsigned long
			calc_load(unsigned long load, unsigned long exp, unsigned long active)
			{
			load *= exp;
			load += active * (FIXED_1 - exp);
			load += 1UL << (FSHIFT - 1);
			return load >> FSHIFT;
			}

	#ifdef CONFIG_NO_HZ		#ifdef CONFIG_NO_HZ
	/*		/*
	* For NO_HZ we delay the active fold to the next LOAD_FREQ update.		* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
	@@ -3148,6 +3214,128 @@ static long calc_load_fold_idle(void)

	return delta;		return delta;
	}		}

			/**
			* fixed_power_int - compute: x^n, in O(log n) time
			*
			* @x: base of the power
			* @frac_bits: fractional bits of @x
			* @n: power to raise @x to.
			*
			* By exploiting the relation between the definition of the natural power
			* function: x^n := xx...*x (x multiplied by itself for n times), and
			* the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
			* (where: n_i \elem {0, 1}, the binary vector representing n),
			* we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
			* of course trivially computable in O(log_2 n), the length of our binary
			* vector.
			*/
			static unsigned long
			fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
			{
			unsigned long result = 1UL << frac_bits;

			if (n) for (;;) {
			if (n & 1) {
			result *= x;
			result += 1UL << (frac_bits - 1);
			result >>= frac_bits;
			}
			n >>= 1;
			if (!n)
			break;
			x *= x;
			x += 1UL << (frac_bits - 1);
			x >>= frac_bits;
			}

			return result;
			}

			/*
			* a1 = a0 * e + a * (1 - e)
			*
			* a2 = a1 * e + a * (1 - e)
			* = (a0 * e + a * (1 - e)) * e + a * (1 - e)
			* = a0 * e^2 + a * (1 - e) * (1 + e)
			*
			* a3 = a2 * e + a * (1 - e)
			* = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
			* = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
			*
			* ...
			*
			* an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
			* = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
			* = a0 * e^n + a * (1 - e^n)
			*
			* [1] application of the geometric series:
			*
			* n 1 - x^(n+1)
			* S_n := \Sum x^i = -------------
			* i=0 1 - x
			*/
			static unsigned long
			calc_load_n(unsigned long load, unsigned long exp,
			unsigned long active, unsigned int n)
			{

			return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
			}

			/*
			* NO_HZ can leave us missing all per-cpu ticks calling
			* calc_load_account_active(), but since an idle CPU folds its delta into
			* calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
			* in the pending idle delta if our idle period crossed a load cycle boundary.
			*
			* Once we've updated the global active value, we need to apply the exponential
			* weights adjusted to the number of cycles missed.
			*/
			static void calc_global_nohz(unsigned long ticks)
			{
			long delta, active, n;

			if (time_before(jiffies, calc_load_update))
			return;

			/*
			* If we crossed a calc_load_update boundary, make sure to fold
			* any pending idle changes, the respective CPUs might have
			* missed the tick driven calc_load_account_active() update
			* due to NO_HZ.
			*/
			delta = calc_load_fold_idle();
			if (delta)
			atomic_long_add(delta, &calc_load_tasks);

			/*
			* If we were idle for multiple load cycles, apply them.
			*/
			if (ticks >= LOAD_FREQ) {
			n = ticks / LOAD_FREQ;

			active = atomic_long_read(&calc_load_tasks);
			active = active > 0 ? active * FIXED_1 : 0;

			avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
			avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
			avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);

			calc_load_update += n * LOAD_FREQ;
			}

			/*
			* Its possible the remainder of the above division also crosses
			* a LOAD_FREQ period, the regular check in calc_global_load()
			* which comes after this will take care of that.
			*
			* Consider us being 11 ticks before a cycle completion, and us
			* sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
			* age us 4 cycles, and the test in calc_global_load() will
			* pick up the final one.
			*/
			}
	#else		#else
	static void calc_load_account_idle(struct rq *this_rq)		static void calc_load_account_idle(struct rq *this_rq)
	{		{
	@@ -3157,6 +3345,10 @@ static inline long calc_load_fold_idle(void)
	{		{
	return 0;		return 0;
	}		}

			static void calc_global_nohz(unsigned long ticks)
			{
			}
	#endif		#endif

	/**		/**
	@@ -3174,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
	loads[2] = (avenrun[2] + offset) << shift;		loads[2] = (avenrun[2] + offset) << shift;
	}		}

	static unsigned long
	calc_load(unsigned long load, unsigned long exp, unsigned long active)
	{
	load *= exp;
	load += active * (FIXED_1 - exp);
	return load >> FSHIFT;
	}

	/*		/*
	* calc_load - update the avenrun load estimates 10 ticks after the		* calc_load - update the avenrun load estimates 10 ticks after the
	* CPUs have updated calc_load_tasks.		* CPUs have updated calc_load_tasks.
	*/		*/
	void calc_global_load(void)		void calc_global_load(unsigned long ticks)
	{		{
	unsigned long upd = calc_load_update + 10;
	long active;		long active;

	if (time_before(jiffies, upd))		calc_global_nohz(ticks);

			if (time_before(jiffies, calc_load_update + 10))
	return;		return;

	active = atomic_long_read(&calc_load_tasks);		active = atomic_long_read(&calc_load_tasks);
	@@ -3845,7 +4030,6 @@ static void put_prev_task(struct rq rq, struct task_struct prev)
	{		{
	if (prev->se.on_rq)		if (prev->se.on_rq)
	update_rq_clock(rq);		update_rq_clock(rq);
	rq->skip_clock_update = 0;
	prev->sched_class->put_prev_task(rq, prev);		prev->sched_class->put_prev_task(rq, prev);
	}		}

	@@ -3903,7 +4087,6 @@ need_resched_nonpreemptible:
	hrtick_clear(rq);		hrtick_clear(rq);

	raw_spin_lock_irq(&rq->lock);		raw_spin_lock_irq(&rq->lock);
	clear_tsk_need_resched(prev);

	switch_count = &prev->nivcsw;		switch_count = &prev->nivcsw;
	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {		if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
	@@ -3935,6 +4118,8 @@ need_resched_nonpreemptible:

	put_prev_task(rq, prev);		put_prev_task(rq, prev);
	next = pick_next_task(rq);		next = pick_next_task(rq);
			clear_tsk_need_resched(prev);
			rq->skip_clock_update = 0;

	if (likely(prev != next)) {		if (likely(prev != next)) {
	sched_info_switch(prev, next);		sched_info_switch(prev, next);
	@@ -3943,6 +4128,7 @@ need_resched_nonpreemptible:
	rq->nr_switches++;		rq->nr_switches++;
	rq->curr = next;		rq->curr = next;
	++*switch_count;		++*switch_count;
			WARN_ON_ONCE(test_tsk_need_resched(next));

	context_switch(rq, prev, next); /* unlocks the rq */		context_switch(rq, prev, next); /* unlocks the rq */
	/*		/*

kernel/timer.c

+7 −1

Original line number	Original line	Diff line number	Diff line
	@@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now)
	struct tvec_base *base = __get_cpu_var(tvec_bases);		struct tvec_base *base = __get_cpu_var(tvec_bases);
	unsigned long expires;		unsigned long expires;

			/*
			* Pretend that there is no timer pending if the cpu is offline.
			* Possible pending timers will be migrated later to an active cpu.
			*/
			if (cpu_is_offline(smp_processor_id()))
			return now + NEXT_TIMER_MAX_DELTA;
	spin_lock(&base->lock);		spin_lock(&base->lock);
	if (time_before_eq(base->next_timer, base->timer_jiffies))		if (time_before_eq(base->next_timer, base->timer_jiffies))
	base->next_timer = __next_timer_interrupt(base);		base->next_timer = __next_timer_interrupt(base);
	@@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks)
	{		{
	jiffies_64 += ticks;		jiffies_64 += ticks;
	update_wall_time();		update_wall_time();
	calc_global_load();		calc_global_load(ticks);
	}		}

	#ifdef __ARCH_WANT_SYS_ALARM		#ifdef __ARCH_WANT_SYS_ALARM