Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cefef3a7 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'sched/core' into timers/nohz, to avoid conflicts in upcoming patches



Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 92d21ac7 748c7201
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -301,8 +301,6 @@ static void kvm_register_steal_time(void)
	if (!has_steal_clock)
		return;

	memset(st, 0, sizeof(*st));

	wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
	pr_info("kvm-stealtime: cpu %d, msr %llx\n",
		cpu, (unsigned long long) slow_virt_to_phys(st));
+6 −2
Original line number Diff line number Diff line
@@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p);
#define TASK_WAKING		256
#define TASK_PARKED		512
#define TASK_NOLOAD		1024
#define TASK_STATE_MAX		2048
#define TASK_NEW		2048
#define TASK_STATE_MAX		4096

#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"

extern char ___assert_task_state[1 - 2*!!(
		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t)
		__put_task_struct(t);
}

struct task_struct *task_rcu_dereference(struct task_struct **ptask);
struct task_struct *try_get_task_struct(struct task_struct **ptask);

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
			 cputime_t *utime, cputime_t *stime);
+76 −0
Original line number Diff line number Diff line
@@ -210,6 +210,82 @@ void release_task(struct task_struct *p)
		goto repeat;
}

/*
 * Note that if this function returns a valid task_struct pointer (!NULL)
 * task->usage must remain >0 for the duration of the RCU critical section.
 */
struct task_struct *task_rcu_dereference(struct task_struct **ptask)
{
	struct sighand_struct *sighand;
	struct task_struct *task;

	/*
	 * We need to verify that release_task() was not called and thus
	 * delayed_put_task_struct() can't run and drop the last reference
	 * before rcu_read_unlock(). We check task->sighand != NULL,
	 * but we can read the already freed and reused memory.
	 */
retry:
	task = rcu_dereference(*ptask);
	if (!task)
		return NULL;

	probe_kernel_address(&task->sighand, sighand);

	/*
	 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
	 * was already freed we can not miss the preceding update of this
	 * pointer.
	 */
	smp_rmb();
	if (unlikely(task != READ_ONCE(*ptask)))
		goto retry;

	/*
	 * We've re-checked that "task == *ptask", now we have two different
	 * cases:
	 *
	 * 1. This is actually the same task/task_struct. In this case
	 *    sighand != NULL tells us it is still alive.
	 *
	 * 2. This is another task which got the same memory for task_struct.
	 *    We can't know this of course, and we can not trust
	 *    sighand != NULL.
	 *
	 *    In this case we actually return a random value, but this is
	 *    correct.
	 *
	 *    If we return NULL - we can pretend that we actually noticed that
	 *    *ptask was updated when the previous task has exited. Or pretend
	 *    that probe_slab_address(&sighand) reads NULL.
	 *
	 *    If we return the new task (because sighand is not NULL for any
	 *    reason) - this is fine too. This (new) task can't go away before
	 *    another gp pass.
	 *
	 *    And note: We could even eliminate the false positive if re-read
	 *    task->sighand once again to avoid the falsely NULL. But this case
	 *    is very unlikely so we don't care.
	 */
	if (!sighand)
		return NULL;

	return task;
}

struct task_struct *try_get_task_struct(struct task_struct **ptask)
{
	struct task_struct *task;

	rcu_read_lock();
	task = task_rcu_dereference(ptask);
	if (task)
		get_task_struct(task);
	rcu_read_unlock();

	return task;
}

/*
 * Determine if a process group is "orphaned", according to the POSIX
 * definition in 2.2.2.52.  Orphaned process groups are not to be affected
+81 −33
Original line number Diff line number Diff line
@@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)

	__sched_fork(clone_flags, p);
	/*
	 * We mark the process as running here. This guarantees that
	 * We mark the process as NEW here. This guarantees that
	 * nobody will actually run it, and a signal or other external
	 * event cannot wake it up and insert it on the runqueue either.
	 */
	p->state = TASK_RUNNING;
	p->state = TASK_NEW;

	/*
	 * Make sure we do not leak PI boosting priority to the child.
@@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
		p->sched_class = &fair_sched_class;
	}

	if (p->sched_class->task_fork)
		p->sched_class->task_fork(p);
	init_entity_runnable_average(&p->se);

	/*
	 * The child is not yet in the pid-hash so no cgroup attach races,
@@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
	 * Silence PROVE_RCU.
	 */
	raw_spin_lock_irqsave(&p->pi_lock, flags);
	set_task_cpu(p, cpu);
	/*
	 * We're setting the cpu for the first time, we don't migrate,
	 * so use __set_task_cpu().
	 */
	__set_task_cpu(p, cpu);
	if (p->sched_class->task_fork)
		p->sched_class->task_fork(p);
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);

#ifdef CONFIG_SCHED_INFO
@@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p)
	struct rq_flags rf;
	struct rq *rq;

	/* Initialize new task's runnable average */
	init_entity_runnable_average(&p->se);
	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
	p->state = TASK_RUNNING;
#ifdef CONFIG_SMP
	/*
	 * Fork balancing, do it here and not earlier because:
	 *  - cpus_allowed can change in the fork path
	 *  - any previously selected cpu might disappear through hotplug
	 *
	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
	 * as we're not fully set-up yet.
	 */
	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
#endif
	rq = __task_rq_lock(p, &rf);
	post_init_entity_util_avg(&p->se);
@@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev)
		pr_cont("\n");
	}
#endif
	if (panic_on_warn)
		panic("scheduling while atomic\n");

	dump_stack();
	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
@@ -4752,7 +4762,8 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 * @len: length in bytes of the bitmask pointed to by user_mask_ptr
 * @user_mask_ptr: user-space pointer to hold the current cpu mask
 *
 * Return: 0 on success. An error code otherwise.
 * Return: size of CPU mask copied to user_mask_ptr on success. An
 * error code otherwise.
 */
SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
		unsigned long __user *, user_mask_ptr)
@@ -7231,7 +7242,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
	struct rq *rq = cpu_rq(cpu);

	rq->calc_load_update = calc_load_update;
	account_reset_rq(rq);
	update_max_interval();
}

@@ -7711,6 +7721,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
	INIT_LIST_HEAD(&tg->children);
	list_add_rcu(&tg->siblings, &parent->children);
	spin_unlock_irqrestore(&task_group_lock, flags);

	online_fair_sched_group(tg);
}

/* rcu callback to free various structures associated with a task group */
@@ -7739,27 +7751,9 @@ void sched_offline_group(struct task_group *tg)
	spin_unlock_irqrestore(&task_group_lock, flags);
}

/* change task's runqueue when it moves between groups.
 *	The caller of this function should have put the task in its new group
 *	by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
 *	reflect its new group.
 */
void sched_move_task(struct task_struct *tsk)
static void sched_change_group(struct task_struct *tsk, int type)
{
	struct task_group *tg;
	int queued, running;
	struct rq_flags rf;
	struct rq *rq;

	rq = task_rq_lock(tsk, &rf);

	running = task_current(rq, tsk);
	queued = task_on_rq_queued(tsk);

	if (queued)
		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
	if (unlikely(running))
		put_prev_task(rq, tsk);

	/*
	 * All callers are synchronized by task_rq_lock(); we do not use RCU
@@ -7772,11 +7766,37 @@ void sched_move_task(struct task_struct *tsk)
	tsk->sched_task_group = tg;

#ifdef CONFIG_FAIR_GROUP_SCHED
	if (tsk->sched_class->task_move_group)
		tsk->sched_class->task_move_group(tsk);
	if (tsk->sched_class->task_change_group)
		tsk->sched_class->task_change_group(tsk, type);
	else
#endif
		set_task_rq(tsk, task_cpu(tsk));
}

/*
 * Change task's runqueue when it moves between groups.
 *
 * The caller of this function should have put the task in its new group by
 * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
 * its new group.
 */
void sched_move_task(struct task_struct *tsk)
{
	int queued, running;
	struct rq_flags rf;
	struct rq *rq;

	rq = task_rq_lock(tsk, &rf);

	running = task_current(rq, tsk);
	queued = task_on_rq_queued(tsk);

	if (queued)
		dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
	if (unlikely(running))
		put_prev_task(rq, tsk);

	sched_change_group(tsk, TASK_MOVE_GROUP);

	if (unlikely(running))
		tsk->sched_class->set_curr_task(rq);
@@ -8204,15 +8224,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
	sched_free_group(tg);
}

/*
 * This is called before wake_up_new_task(), therefore we really only
 * have to set its group bits, all the other stuff does not apply.
 */
static void cpu_cgroup_fork(struct task_struct *task)
{
	sched_move_task(task);
	struct rq_flags rf;
	struct rq *rq;

	rq = task_rq_lock(task, &rf);

	sched_change_group(task, TASK_SET_GROUP);

	task_rq_unlock(rq, task, &rf);
}

static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
{
	struct task_struct *task;
	struct cgroup_subsys_state *css;
	int ret = 0;

	cgroup_taskset_for_each(task, css, tset) {
#ifdef CONFIG_RT_GROUP_SCHED
@@ -8223,8 +8255,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
		if (task->sched_class != &fair_sched_class)
			return -EINVAL;
#endif
		/*
		 * Serialize against wake_up_new_task() such that if its
		 * running, we're sure to observe its full state.
		 */
		raw_spin_lock_irq(&task->pi_lock);
		/*
		 * Avoid calling sched_move_task() before wake_up_new_task()
		 * has happened. This would lead to problems with PELT, due to
		 * move wanting to detach+attach while we're not attached yet.
		 */
		if (task->state == TASK_NEW)
			ret = -EINVAL;
		raw_spin_unlock_irq(&task->pi_lock);

		if (ret)
			break;
	}
	return 0;
	return ret;
}

static void cpu_cgroup_attach(struct cgroup_taskset *tset)
+73 −41
Original line number Diff line number Diff line
@@ -25,15 +25,13 @@ enum cpuacct_stat_index {
	CPUACCT_STAT_NSTATS,
};

enum cpuacct_usage_index {
	CPUACCT_USAGE_USER,	/* ... user mode */
	CPUACCT_USAGE_SYSTEM,	/* ... kernel mode */

	CPUACCT_USAGE_NRUSAGE,
static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
};

struct cpuacct_usage {
	u64	usages[CPUACCT_USAGE_NRUSAGE];
	u64	usages[CPUACCT_STAT_NSTATS];
};

/* track cpu usage of a group of tasks and its child groups */
@@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css)
}

static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
				 enum cpuacct_usage_index index)
				 enum cpuacct_stat_index index)
{
	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
	u64 data;

	/*
	 * We allow index == CPUACCT_USAGE_NRUSAGE here to read
	 * We allow index == CPUACCT_STAT_NSTATS here to read
	 * the sum of suages.
	 */
	BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
	BUG_ON(index > CPUACCT_STAT_NSTATS);

#ifndef CONFIG_64BIT
	/*
@@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

	if (index == CPUACCT_USAGE_NRUSAGE) {
	if (index == CPUACCT_STAT_NSTATS) {
		int i = 0;

		data = 0;
		for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
		for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
			data += cpuusage->usages[i];
	} else {
		data = cpuusage->usages[index];
@@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

	for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
		cpuusage->usages[i] = val;

#ifndef CONFIG_64BIT
@@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)

/* return total cpu usage (in nanoseconds) of a group */
static u64 __cpuusage_read(struct cgroup_subsys_state *css,
			   enum cpuacct_usage_index index)
			   enum cpuacct_stat_index index)
{
	struct cpuacct *ca = css_ca(css);
	u64 totalcpuusage = 0;
@@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css,
static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
			      struct cftype *cft)
{
	return __cpuusage_read(css, CPUACCT_USAGE_USER);
	return __cpuusage_read(css, CPUACCT_STAT_USER);
}

static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
			     struct cftype *cft)
{
	return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
}

static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
	return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
}

static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
}

static int __cpuacct_percpu_seq_show(struct seq_file *m,
				     enum cpuacct_usage_index index)
				     enum cpuacct_stat_index index)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	u64 percpu;
@@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m,

static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
{
	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
}

static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
{
	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
}

static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
}

static const char * const cpuacct_stat_desc[] = {
	[CPUACCT_STAT_USER] = "user",
	[CPUACCT_STAT_SYSTEM] = "system",
};
static int cpuacct_all_seq_show(struct seq_file *m, void *V)
{
	struct cpuacct *ca = css_ca(seq_css(m));
	int index;
	int cpu;

	seq_puts(m, "cpu");
	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
		seq_printf(m, " %s", cpuacct_stat_desc[index]);
	seq_puts(m, "\n");

	for_each_possible_cpu(cpu) {
		struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);

		seq_printf(m, "%d", cpu);

		for (index = 0; index < CPUACCT_STAT_NSTATS; index++) {
#ifndef CONFIG_64BIT
			/*
			 * Take rq->lock to make 64-bit read safe on 32-bit
			 * platforms.
			 */
			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
#endif

			seq_printf(m, " %llu", cpuusage->usages[index]);

#ifndef CONFIG_64BIT
			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
#endif
		}
		seq_puts(m, "\n");
	}
	return 0;
}

static int cpuacct_stats_show(struct seq_file *sf, void *v)
{
	struct cpuacct *ca = css_ca(seq_css(sf));
	s64 val[CPUACCT_STAT_NSTATS];
	int cpu;
	s64 val = 0;
	int stat;

	memset(val, 0, sizeof(val));
	for_each_possible_cpu(cpu) {
		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
		val += kcpustat->cpustat[CPUTIME_USER];
		val += kcpustat->cpustat[CPUTIME_NICE];
	}
	val = cputime64_to_clock_t(val);
	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;

	val = 0;
	for_each_possible_cpu(cpu) {
		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
		val += kcpustat->cpustat[CPUTIME_SYSTEM];
		val += kcpustat->cpustat[CPUTIME_IRQ];
		val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_USER];
		val[CPUACCT_STAT_USER]   += cpustat[CPUTIME_NICE];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ];
		val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ];
	}

	val = cputime64_to_clock_t(val);
	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
		seq_printf(sf, "%s %lld\n",
			   cpuacct_stat_desc[stat],
			   cputime64_to_clock_t(val[stat]));
	}

	return 0;
}
@@ -301,6 +329,10 @@ static struct cftype files[] = {
		.name = "usage_percpu_sys",
		.seq_show = cpuacct_percpu_sys_seq_show,
	},
	{
		.name = "usage_all",
		.seq_show = cpuacct_all_seq_show,
	},
	{
		.name = "stat",
		.seq_show = cpuacct_stats_show,
@@ -316,11 +348,11 @@ static struct cftype files[] = {
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
	int index = CPUACCT_USAGE_SYSTEM;
	int index = CPUACCT_STAT_SYSTEM;
	struct pt_regs *regs = task_pt_regs(tsk);

	if (regs && user_mode(regs))
		index = CPUACCT_USAGE_USER;
		index = CPUACCT_STAT_USER;

	rcu_read_lock();

Loading