Merge branch 'sched/core' into timers/nohz, to avoid conflicts in upcoming patches (cefef3a7) · Commits · e / devices / android_kernel_fairphone_FP3

arch/x86/kernel/kvm.c

+0 −2

Original line number	Diff line number	Diff line
		@@ -301,8 +301,6 @@ static void kvm_register_steal_time(void)
		if (!has_steal_clock)
		return;

		memset(st, 0, sizeof(*st));

		wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) \| KVM_MSR_ENABLED));
		pr_info("kvm-stealtime: cpu %d, msr %llx\n",
		cpu, (unsigned long long) slow_virt_to_phys(st));

include/linux/sched.h

+6 −2

Original line number	Diff line number	Diff line
		@@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p);
		#define TASK_WAKING 256
		#define TASK_PARKED 512
		#define TASK_NOLOAD 1024
		#define TASK_STATE_MAX 2048
		#define TASK_NEW 2048
		#define TASK_STATE_MAX 4096

		#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
		#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"

		extern char ___assert_task_state[1 - 2*!!(
		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
		@@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t)
		__put_task_struct(t);
		}

		struct task_struct task_rcu_dereference(struct task_struct *ptask);
		struct task_struct try_get_task_struct(struct task_struct *ptask);

		#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
		extern void task_cputime(struct task_struct *t,
		cputime_t utime, cputime_t stime);

kernel/exit.c

+76 −0

Original line number	Diff line number	Diff line
		@@ -210,6 +210,82 @@ void release_task(struct task_struct *p)
		goto repeat;
		}

		/*
		* Note that if this function returns a valid task_struct pointer (!NULL)
		* task->usage must remain >0 for the duration of the RCU critical section.
		*/
		struct task_struct task_rcu_dereference(struct task_struct *ptask)
		{
		struct sighand_struct *sighand;
		struct task_struct *task;

		/*
		* We need to verify that release_task() was not called and thus
		* delayed_put_task_struct() can't run and drop the last reference
		* before rcu_read_unlock(). We check task->sighand != NULL,
		* but we can read the already freed and reused memory.
		*/
		retry:
		task = rcu_dereference(*ptask);
		if (!task)
		return NULL;

		probe_kernel_address(&task->sighand, sighand);

		/*
		* Pairs with atomic_dec_and_test() in put_task_struct(). If this task
		* was already freed we can not miss the preceding update of this
		* pointer.
		*/
		smp_rmb();
		if (unlikely(task != READ_ONCE(*ptask)))
		goto retry;

		/*
		* We've re-checked that "task == *ptask", now we have two different
		* cases:
		*
		* 1. This is actually the same task/task_struct. In this case
		* sighand != NULL tells us it is still alive.
		*
		* 2. This is another task which got the same memory for task_struct.
		* We can't know this of course, and we can not trust
		* sighand != NULL.
		*
		* In this case we actually return a random value, but this is
		* correct.
		*
		* If we return NULL - we can pretend that we actually noticed that
		* *ptask was updated when the previous task has exited. Or pretend
		* that probe_slab_address(&sighand) reads NULL.
		*
		* If we return the new task (because sighand is not NULL for any
		* reason) - this is fine too. This (new) task can't go away before
		* another gp pass.
		*
		* And note: We could even eliminate the false positive if re-read
		* task->sighand once again to avoid the falsely NULL. But this case
		* is very unlikely so we don't care.
		*/
		if (!sighand)
		return NULL;

		return task;
		}

		struct task_struct try_get_task_struct(struct task_struct *ptask)
		{
		struct task_struct *task;

		rcu_read_lock();
		task = task_rcu_dereference(ptask);
		if (task)
		get_task_struct(task);
		rcu_read_unlock();

		return task;
		}

		/*
		* Determine if a process group is "orphaned", according to the POSIX
		* definition in 2.2.2.52. Orphaned process groups are not to be affected

kernel/sched/core.c

+81 −33

Original line number	Diff line number	Diff line
		@@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)

		__sched_fork(clone_flags, p);
		/*
		* We mark the process as running here. This guarantees that
		* We mark the process as NEW here. This guarantees that
		* nobody will actually run it, and a signal or other external
		* event cannot wake it up and insert it on the runqueue either.
		*/
		p->state = TASK_RUNNING;
		p->state = TASK_NEW;

		/*
		* Make sure we do not leak PI boosting priority to the child.
		@@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
		p->sched_class = &fair_sched_class;
		}

		if (p->sched_class->task_fork)
		p->sched_class->task_fork(p);
		init_entity_runnable_average(&p->se);

		/*
		* The child is not yet in the pid-hash so no cgroup attach races,
		@@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
		* Silence PROVE_RCU.
		*/
		raw_spin_lock_irqsave(&p->pi_lock, flags);
		set_task_cpu(p, cpu);
		/*
		* We're setting the cpu for the first time, we don't migrate,
		* so use __set_task_cpu().
		*/
		__set_task_cpu(p, cpu);
		if (p->sched_class->task_fork)
		p->sched_class->task_fork(p);
		raw_spin_unlock_irqrestore(&p->pi_lock, flags);

		#ifdef CONFIG_SCHED_INFO
		@@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p)
		struct rq_flags rf;
		struct rq *rq;

		/* Initialize new task's runnable average */
		init_entity_runnable_average(&p->se);
		raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
		p->state = TASK_RUNNING;
		#ifdef CONFIG_SMP
		/*
		* Fork balancing, do it here and not earlier because:
		* - cpus_allowed can change in the fork path
		* - any previously selected cpu might disappear through hotplug
		*
		* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
		* as we're not fully set-up yet.
		*/
		set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
		__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
		#endif
		rq = __task_rq_lock(p, &rf);
		post_init_entity_util_avg(&p->se);
		@@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
		pr_cont("\n");
		}
		#endif
		if (panic_on_warn)
		panic("scheduling while atomic\n");

		dump_stack();
		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
		}
		@@ -4752,7 +4762,8 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
		* @len: length in bytes of the bitmask pointed to by user_mask_ptr
		* @user_mask_ptr: user-space pointer to hold the current cpu mask
		*
		* Return: 0 on success. An error code otherwise.
		* Return: size of CPU mask copied to user_mask_ptr on success. An
		* error code otherwise.
		*/
		SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
		unsigned long __user *, user_mask_ptr)
		@@ -7231,7 +7242,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
		struct rq *rq = cpu_rq(cpu);

		rq->calc_load_update = calc_load_update;
		account_reset_rq(rq);
		update_max_interval();
		}

		@@ -7711,6 +7721,8 @@ void sched_online_group(struct task_group tg, struct task_group parent)
		INIT_LIST_HEAD(&tg->children);
		list_add_rcu(&tg->siblings, &parent->children);
		spin_unlock_irqrestore(&task_group_lock, flags);

		online_fair_sched_group(tg);
		}

		/* rcu callback to free various structures associated with a task group */
		@@ -7739,27 +7751,9 @@ void sched_offline_group(struct task_group *tg)
		spin_unlock_irqrestore(&task_group_lock, flags);
		}

		/* change task's runqueue when it moves between groups.
		* The caller of this function should have put the task in its new group
		* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
		* reflect its new group.
		*/
		void sched_move_task(struct task_struct *tsk)
		static void sched_change_group(struct task_struct *tsk, int type)
		{
		struct task_group *tg;
		int queued, running;
		struct rq_flags rf;
		struct rq *rq;

		rq = task_rq_lock(tsk, &rf);

		running = task_current(rq, tsk);
		queued = task_on_rq_queued(tsk);

		if (queued)
		dequeue_task(rq, tsk, DEQUEUE_SAVE \| DEQUEUE_MOVE);
		if (unlikely(running))
		put_prev_task(rq, tsk);

		/*
		* All callers are synchronized by task_rq_lock(); we do not use RCU
		@@ -7772,11 +7766,37 @@ void sched_move_task(struct task_struct *tsk)
		tsk->sched_task_group = tg;

		#ifdef CONFIG_FAIR_GROUP_SCHED
		if (tsk->sched_class->task_move_group)
		tsk->sched_class->task_move_group(tsk);
		if (tsk->sched_class->task_change_group)
		tsk->sched_class->task_change_group(tsk, type);
		else
		#endif
		set_task_rq(tsk, task_cpu(tsk));
		}

		/*
		* Change task's runqueue when it moves between groups.
		*
		* The caller of this function should have put the task in its new group by
		* now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
		* its new group.
		*/
		void sched_move_task(struct task_struct *tsk)
		{
		int queued, running;
		struct rq_flags rf;
		struct rq *rq;

		rq = task_rq_lock(tsk, &rf);

		running = task_current(rq, tsk);
		queued = task_on_rq_queued(tsk);

		if (queued)
		dequeue_task(rq, tsk, DEQUEUE_SAVE \| DEQUEUE_MOVE);
		if (unlikely(running))
		put_prev_task(rq, tsk);

		sched_change_group(tsk, TASK_MOVE_GROUP);

		if (unlikely(running))
		tsk->sched_class->set_curr_task(rq);
		@@ -8204,15 +8224,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
		sched_free_group(tg);
		}

		/*
		* This is called before wake_up_new_task(), therefore we really only
		* have to set its group bits, all the other stuff does not apply.
		*/
		static void cpu_cgroup_fork(struct task_struct *task)
		{
		sched_move_task(task);
		struct rq_flags rf;
		struct rq *rq;

		rq = task_rq_lock(task, &rf);

		sched_change_group(task, TASK_SET_GROUP);

		task_rq_unlock(rq, task, &rf);
		}

		static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
		{
		struct task_struct *task;
		struct cgroup_subsys_state *css;
		int ret = 0;

		cgroup_taskset_for_each(task, css, tset) {
		#ifdef CONFIG_RT_GROUP_SCHED
		@@ -8223,8 +8255,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
		if (task->sched_class != &fair_sched_class)
		return -EINVAL;
		#endif
		/*
		* Serialize against wake_up_new_task() such that if its
		* running, we're sure to observe its full state.
		*/
		raw_spin_lock_irq(&task->pi_lock);
		/*
		* Avoid calling sched_move_task() before wake_up_new_task()
		* has happened. This would lead to problems with PELT, due to
		* move wanting to detach+attach while we're not attached yet.
		*/
		if (task->state == TASK_NEW)
		ret = -EINVAL;
		raw_spin_unlock_irq(&task->pi_lock);

		if (ret)
		break;
		}
		return 0;
		return ret;
		}

		static void cpu_cgroup_attach(struct cgroup_taskset *tset)

kernel/sched/cpuacct.c

+73 −41

Original line number	Diff line number	Diff line
		@@ -25,15 +25,13 @@ enum cpuacct_stat_index {
		CPUACCT_STAT_NSTATS,
		};

		enum cpuacct_usage_index {
		CPUACCT_USAGE_USER, /* ... user mode */
		CPUACCT_USAGE_SYSTEM, /* ... kernel mode */

		CPUACCT_USAGE_NRUSAGE,
		static const char * const cpuacct_stat_desc[] = {
		[CPUACCT_STAT_USER] = "user",
		[CPUACCT_STAT_SYSTEM] = "system",
		};

		struct cpuacct_usage {
		u64 usages[CPUACCT_USAGE_NRUSAGE];
		u64 usages[CPUACCT_STAT_NSTATS];
		};

		/* track cpu usage of a group of tasks and its child groups */
		@@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
		}

		static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
		enum cpuacct_usage_index index)
		enum cpuacct_stat_index index)
		{
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
		u64 data;

		/*
		* We allow index == CPUACCT_USAGE_NRUSAGE here to read
		* We allow index == CPUACCT_STAT_NSTATS here to read
		* the sum of suages.
		*/
		BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
		BUG_ON(index > CPUACCT_STAT_NSTATS);

		#ifndef CONFIG_64BIT
		/*
		@@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
		raw_spin_lock_irq(&cpu_rq(cpu)->lock);
		#endif

		if (index == CPUACCT_USAGE_NRUSAGE) {
		if (index == CPUACCT_STAT_NSTATS) {
		int i = 0;

		data = 0;
		for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
		data += cpuusage->usages[i];
		} else {
		data = cpuusage->usages[index];
		@@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
		raw_spin_lock_irq(&cpu_rq(cpu)->lock);
		#endif

		for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
		cpuusage->usages[i] = val;

		#ifndef CONFIG_64BIT
		@@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)

		/* return total cpu usage (in nanoseconds) of a group */
		static u64 __cpuusage_read(struct cgroup_subsys_state *css,
		enum cpuacct_usage_index index)
		enum cpuacct_stat_index index)
		{
		struct cpuacct *ca = css_ca(css);
		u64 totalcpuusage = 0;
		@@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css,
		static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
		struct cftype *cft)
		{
		return __cpuusage_read(css, CPUACCT_USAGE_USER);
		return __cpuusage_read(css, CPUACCT_STAT_USER);
		}

		static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
		struct cftype *cft)
		{
		return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
		return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
		}

		static u64 cpuusage_read(struct cgroup_subsys_state css, struct cftype cft)
		{
		return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
		return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
		}

		static int cpuusage_write(struct cgroup_subsys_state css, struct cftype cft,
		@@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state css, struct cftype cft,
		}

		static int __cpuacct_percpu_seq_show(struct seq_file *m,
		enum cpuacct_usage_index index)
		enum cpuacct_stat_index index)
		{
		struct cpuacct *ca = css_ca(seq_css(m));
		u64 percpu;
		@@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m,

		static int cpuacct_percpu_user_seq_show(struct seq_file m, void V)
		{
		return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
		return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
		}

		static int cpuacct_percpu_sys_seq_show(struct seq_file m, void V)
		{
		return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
		return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
		}

		static int cpuacct_percpu_seq_show(struct seq_file m, void V)
		{
		return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
		return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
		}

		static const char * const cpuacct_stat_desc[] = {
		[CPUACCT_STAT_USER] = "user",
		[CPUACCT_STAT_SYSTEM] = "system",
		};
		static int cpuacct_all_seq_show(struct seq_file m, void V)
		{
		struct cpuacct *ca = css_ca(seq_css(m));
		int index;
		int cpu;

		seq_puts(m, "cpu");
		for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
		seq_puts(m, "\n");

		for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
		#ifndef CONFIG_64BIT
		/*
		* Take rq->lock to make 64-bit read safe on 32-bit
		* platforms.
		*/
		raw_spin_lock_irq(&cpu_rq(cpu)->lock);
		#endif

		seq_printf(m, " %llu", cpuusage->usages[index]);

		#ifndef CONFIG_64BIT
		raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
		#endif
		}
		seq_puts(m, "\n");
		}
		return 0;
		}

		static int cpuacct_stats_show(struct seq_file sf, void v)
		{
		struct cpuacct *ca = css_ca(seq_css(sf));
		s64 val[CPUACCT_STAT_NSTATS];
		int cpu;
		s64 val = 0;
		int stat;

		memset(val, 0, sizeof(val));
		for_each_possible_cpu(cpu) {
		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
		val += kcpustat->cpustat[CPUTIME_USER];
		val += kcpustat->cpustat[CPUTIME_NICE];
		}
		val = cputime64_to_clock_t(val);
		seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;

		val = 0;
		for_each_possible_cpu(cpu) {
		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
		val += kcpustat->cpustat[CPUTIME_SYSTEM];
		val += kcpustat->cpustat[CPUTIME_IRQ];
		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
		val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
		}

		val = cputime64_to_clock_t(val);
		seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
		for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
		cpuacct_stat_desc[stat],
		cputime64_to_clock_t(val[stat]));
		}

		return 0;
		}
		@@ -301,6 +329,10 @@ static struct cftype files[] = {
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
		},
		{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
		},
		{
		.name = "stat",
		.seq_show = cpuacct_stats_show,
		@@ -316,11 +348,11 @@ static struct cftype files[] = {
		void cpuacct_charge(struct task_struct *tsk, u64 cputime)
		{
		struct cpuacct *ca;
		int index = CPUACCT_USAGE_SYSTEM;
		int index = CPUACCT_STAT_SYSTEM;
		struct pt_regs *regs = task_pt_regs(tsk);

		if (regs && user_mode(regs))
		index = CPUACCT_USAGE_USER;
		index = CPUACCT_STAT_USER;

		rcu_read_lock();