Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit be53f58f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Misc fixes: a cgroup fix, a fair-scheduler migration accounting fix, a
  cputime fix and two cpuacct cleanups"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/cpuacct: Simplify the cpuacct code
  sched/cpuacct: Rename parameter in cpuusage_write() for readability
  sched/fair: Add comments to explain select_idle_sibling()
  sched/fair: Fix fairness issue on migration
  sched/cgroup: Fix/cleanup cgroup teardown/init
  sched/cputime: Fix steal time accounting vs. CPU hotplug
parents 19d6f04c 73e6aafd
Loading
Loading
Loading
Loading
+15 −21
Original line number Diff line number Diff line
@@ -5371,6 +5371,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)

	case CPU_UP_PREPARE:
		rq->calc_load_update = calc_load_update;
		account_reset_rq(rq);
		break;

	case CPU_ONLINE:
@@ -7537,7 +7538,7 @@ void set_curr_task(int cpu, struct task_struct *p)
/* task_group_lock serializes the addition/removal of task groups */
static DEFINE_SPINLOCK(task_group_lock);

static void free_sched_group(struct task_group *tg)
static void sched_free_group(struct task_group *tg)
{
	free_fair_sched_group(tg);
	free_rt_sched_group(tg);
@@ -7563,7 +7564,7 @@ struct task_group *sched_create_group(struct task_group *parent)
	return tg;

err:
	free_sched_group(tg);
	sched_free_group(tg);
	return ERR_PTR(-ENOMEM);
}

@@ -7583,17 +7584,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
}

/* rcu callback to free various structures associated with a task group */
static void free_sched_group_rcu(struct rcu_head *rhp)
static void sched_free_group_rcu(struct rcu_head *rhp)
{
	/* now it should be safe to free those cfs_rqs */
	free_sched_group(container_of(rhp, struct task_group, rcu));
	sched_free_group(container_of(rhp, struct task_group, rcu));
}

/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg)
{
	/* wait for possible concurrent references to cfs_rqs complete */
	call_rcu(&tg->rcu, free_sched_group_rcu);
	call_rcu(&tg->rcu, sched_free_group_rcu);
}

void sched_offline_group(struct task_group *tg)
@@ -8052,31 +8052,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
	if (IS_ERR(tg))
		return ERR_PTR(-ENOMEM);

	sched_online_group(tg, parent);

	return &tg->css;
}

static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
{
	struct task_group *tg = css_tg(css);
	struct task_group *parent = css_tg(css->parent);

	if (parent)
		sched_online_group(tg, parent);
	return 0;
	sched_offline_group(tg);
}

static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
{
	struct task_group *tg = css_tg(css);

	sched_destroy_group(tg);
}

static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
{
	struct task_group *tg = css_tg(css);

	sched_offline_group(tg);
	/*
	 * Relies on the RCU grace period between css_released() and this.
	 */
	sched_free_group(tg);
}

static void cpu_cgroup_fork(struct task_struct *task)
@@ -8436,9 +8431,8 @@ static struct cftype cpu_files[] = {

struct cgroup_subsys cpu_cgrp_subsys = {
	.css_alloc	= cpu_cgroup_css_alloc,
	.css_released	= cpu_cgroup_css_released,
	.css_free	= cpu_cgroup_css_free,
	.css_online	= cpu_cgroup_css_online,
	.css_offline	= cpu_cgroup_css_offline,
	.fork		= cpu_cgroup_fork,
	.can_attach	= cpu_cgroup_can_attach,
	.attach		= cpu_cgroup_attach,
+10 −25
Original line number Diff line number Diff line
@@ -145,13 +145,16 @@ static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
}

static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
			  u64 reset)
			  u64 val)
{
	struct cpuacct *ca = css_ca(css);
	int err = 0;
	int i;

	if (reset) {
	/*
	 * Only allow '0' here to do a reset.
	 */
	if (val) {
		err = -EINVAL;
		goto out;
	}
@@ -235,23 +238,10 @@ static struct cftype files[] = {
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
	struct cpuacct *ca;
	int cpu;

	cpu = task_cpu(tsk);

	rcu_read_lock();

	ca = task_ca(tsk);

	while (true) {
		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
		*cpuusage += cputime;

		ca = parent_ca(ca);
		if (!ca)
			break;
	}

	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
		*this_cpu_ptr(ca->cpuusage) += cputime;
	rcu_read_unlock();
}

@@ -260,18 +250,13 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 *
 * Note: it's the caller that updates the account of the root cgroup.
 */
void cpuacct_account_field(struct task_struct *p, int index, u64 val)
void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
	struct kernel_cpustat *kcpustat;
	struct cpuacct *ca;

	rcu_read_lock();
	ca = task_ca(p);
	while (ca != &root_cpuacct) {
		kcpustat = this_cpu_ptr(ca->cpustat);
		kcpustat->cpustat[index] += val;
		ca = parent_ca(ca);
	}
	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
		this_cpu_ptr(ca->cpustat)->cpustat[index] += val;
	rcu_read_unlock();
}

+2 −2
Original line number Diff line number Diff line
#ifdef CONFIG_CGROUP_CPUACCT

extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);

#else

@@ -10,7 +10,7 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime)
}

static inline void
cpuacct_account_field(struct task_struct *p, int index, u64 val)
cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
{
}

+32 −7
Original line number Diff line number Diff line
@@ -3181,17 +3181,25 @@ static inline void check_schedstat_required(void)
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING);
	bool curr = cfs_rq->curr == se;

	/*
	 * Update the normalized vruntime before updating min_vruntime
	 * through calling update_curr().
	 * If we're the current task, we must renormalise before calling
	 * update_curr().
	 */
	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
	if (renorm && curr)
		se->vruntime += cfs_rq->min_vruntime;

	update_curr(cfs_rq);

	/*
	 * Update run-time statistics of the 'current'.
	 * Otherwise, renormalise after, such that we're placed at the current
	 * moment in time, instead of some random moment in the past.
	 */
	update_curr(cfs_rq);
	if (renorm && !curr)
		se->vruntime += cfs_rq->min_vruntime;

	enqueue_entity_load_avg(cfs_rq, se);
	account_entity_enqueue(cfs_rq, se);
	update_cfs_shares(cfs_rq);
@@ -3207,7 +3215,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
		update_stats_enqueue(cfs_rq, se);
		check_spread(cfs_rq, se);
	}
	if (se != cfs_rq->curr)
	if (!curr)
		__enqueue_entity(cfs_rq, se);
	se->on_rq = 1;

@@ -5071,7 +5079,19 @@ static int select_idle_sibling(struct task_struct *p, int target)
		return i;

	/*
	 * Otherwise, iterate the domains and find an elegible idle cpu.
	 * Otherwise, iterate the domains and find an eligible idle cpu.
	 *
	 * A completely idle sched group at higher domains is more
	 * desirable than an idle group at a lower level, because lower
	 * domains have smaller groups and usually share hardware
	 * resources which causes tasks to contend on them, e.g. x86
	 * hyperthread siblings in the lowest domain (SMT) can contend
	 * on the shared cpu pipeline.
	 *
	 * However, while we prefer idle groups at higher domains
	 * finding an idle cpu at the lowest domain is still better than
	 * returning 'target', which we've already established, isn't
	 * idle.
	 */
	sd = rcu_dereference(per_cpu(sd_llc, target));
	for_each_lower_domain(sd) {
@@ -5081,11 +5101,16 @@ static int select_idle_sibling(struct task_struct *p, int target)
						tsk_cpus_allowed(p)))
				goto next;

			/* Ensure the entire group is idle */
			for_each_cpu(i, sched_group_cpus(sg)) {
				if (i == target || !idle_cpu(i))
					goto next;
			}

			/*
			 * It doesn't matter which cpu we pick, the
			 * whole group is idle.
			 */
			target = cpumask_first_and(sched_group_cpus(sg),
					tsk_cpus_allowed(p));
			goto done;
+13 −0
Original line number Diff line number Diff line
@@ -1841,3 +1841,16 @@ static inline void cpufreq_trigger_update(u64 time)
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}
#endif /* CONFIG_CPU_FREQ */

static inline void account_reset_rq(struct rq *rq)
{
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
	rq->prev_irq_time = 0;
#endif
#ifdef CONFIG_PARAVIRT
	rq->prev_steal_time = 0;
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
	rq->prev_steal_time_rq = 0;
#endif
}