Loading arch/x86/kernel/kvm.c +0 −2 Original line number Diff line number Diff line Loading @@ -301,8 +301,6 @@ static void kvm_register_steal_time(void) if (!has_steal_clock) return; memset(st, 0, sizeof(*st)); wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); pr_info("kvm-stealtime: cpu %d, msr %llx\n", cpu, (unsigned long long) slow_virt_to_phys(st)); Loading include/linux/sched.h +6 −2 Original line number Diff line number Diff line Loading @@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p); #define TASK_WAKING 256 #define TASK_PARKED 512 #define TASK_NOLOAD 1024 #define TASK_STATE_MAX 2048 #define TASK_NEW 2048 #define TASK_STATE_MAX 4096 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN" #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; Loading Loading @@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } struct task_struct *task_rcu_dereference(struct task_struct **ptask); struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime); Loading kernel/exit.c +76 −0 Original line number Diff line number Diff line Loading @@ -210,6 +210,82 @@ void release_task(struct task_struct *p) goto repeat; } /* * Note that if this function returns a valid task_struct pointer (!NULL) * task->usage must remain >0 for the duration of the RCU critical section. */ struct task_struct *task_rcu_dereference(struct task_struct **ptask) { struct sighand_struct *sighand; struct task_struct *task; /* * We need to verify that release_task() was not called and thus * delayed_put_task_struct() can't run and drop the last reference * before rcu_read_unlock(). We check task->sighand != NULL, * but we can read the already freed and reused memory. */ retry: task = rcu_dereference(*ptask); if (!task) return NULL; probe_kernel_address(&task->sighand, sighand); /* * Pairs with atomic_dec_and_test() in put_task_struct(). If this task * was already freed we can not miss the preceding update of this * pointer. */ smp_rmb(); if (unlikely(task != READ_ONCE(*ptask))) goto retry; /* * We've re-checked that "task == *ptask", now we have two different * cases: * * 1. This is actually the same task/task_struct. In this case * sighand != NULL tells us it is still alive. * * 2. This is another task which got the same memory for task_struct. * We can't know this of course, and we can not trust * sighand != NULL. * * In this case we actually return a random value, but this is * correct. * * If we return NULL - we can pretend that we actually noticed that * *ptask was updated when the previous task has exited. Or pretend * that probe_slab_address(&sighand) reads NULL. * * If we return the new task (because sighand is not NULL for any * reason) - this is fine too. This (new) task can't go away before * another gp pass. * * And note: We could even eliminate the false positive if re-read * task->sighand once again to avoid the falsely NULL. But this case * is very unlikely so we don't care. */ if (!sighand) return NULL; return task; } struct task_struct *try_get_task_struct(struct task_struct **ptask) { struct task_struct *task; rcu_read_lock(); task = task_rcu_dereference(ptask); if (task) get_task_struct(task); rcu_read_unlock(); return task; } /* * Determine if a process group is "orphaned", according to the POSIX * definition in 2.2.2.52. Orphaned process groups are not to be affected Loading kernel/sched/core.c +81 −33 Original line number Diff line number Diff line Loading @@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) __sched_fork(clone_flags, p); /* * We mark the process as running here. This guarantees that * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ p->state = TASK_RUNNING; p->state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. Loading Loading @@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_class = &fair_sched_class; } if (p->sched_class->task_fork) p->sched_class->task_fork(p); init_entity_runnable_average(&p->se); /* * The child is not yet in the pid-hash so no cgroup attach races, Loading @@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) * Silence PROVE_RCU. */ raw_spin_lock_irqsave(&p->pi_lock, flags); set_task_cpu(p, cpu); /* * We're setting the cpu for the first time, we don't migrate, * so use __set_task_cpu(). */ __set_task_cpu(p, cpu); if (p->sched_class->task_fork) p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); #ifdef CONFIG_SCHED_INFO Loading Loading @@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p) struct rq_flags rf; struct rq *rq; /* Initialize new task's runnable average */ init_entity_runnable_average(&p->se); raw_spin_lock_irqsave(&p->pi_lock, rf.flags); p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: * - cpus_allowed can change in the fork path * - any previously selected cpu might disappear through hotplug * * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, * as we're not fully set-up yet. */ set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); #endif rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); Loading Loading @@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_cont("\n"); } #endif if (panic_on_warn) panic("scheduling while atomic\n"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } Loading Loading @@ -4752,7 +4762,8 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask * * Return: 0 on success. An error code otherwise. * Return: size of CPU mask copied to user_mask_ptr on success. An * error code otherwise. */ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, unsigned long __user *, user_mask_ptr) Loading Loading @@ -7231,7 +7242,6 @@ static void sched_rq_cpu_starting(unsigned int cpu) struct rq *rq = cpu_rq(cpu); rq->calc_load_update = calc_load_update; account_reset_rq(rq); update_max_interval(); } Loading Loading @@ -7711,6 +7721,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) INIT_LIST_HEAD(&tg->children); list_add_rcu(&tg->siblings, &parent->children); spin_unlock_irqrestore(&task_group_lock, flags); online_fair_sched_group(tg); } /* rcu callback to free various structures associated with a task group */ Loading Loading @@ -7739,27 +7751,9 @@ void sched_offline_group(struct task_group *tg) spin_unlock_irqrestore(&task_group_lock, flags); } /* change task's runqueue when it moves between groups. * The caller of this function should have put the task in its new group * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to * reflect its new group. */ void sched_move_task(struct task_struct *tsk) static void sched_change_group(struct task_struct *tsk, int type) { struct task_group *tg; int queued, running; struct rq_flags rf; struct rq *rq; rq = task_rq_lock(tsk, &rf); running = task_current(rq, tsk); queued = task_on_rq_queued(tsk); if (queued) dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); if (unlikely(running)) put_prev_task(rq, tsk); /* * All callers are synchronized by task_rq_lock(); we do not use RCU Loading @@ -7772,11 +7766,37 @@ void sched_move_task(struct task_struct *tsk) tsk->sched_task_group = tg; #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_move_group) tsk->sched_class->task_move_group(tsk); if (tsk->sched_class->task_change_group) tsk->sched_class->task_change_group(tsk, type); else #endif set_task_rq(tsk, task_cpu(tsk)); } /* * Change task's runqueue when it moves between groups. * * The caller of this function should have put the task in its new group by * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect * its new group. */ void sched_move_task(struct task_struct *tsk) { int queued, running; struct rq_flags rf; struct rq *rq; rq = task_rq_lock(tsk, &rf); running = task_current(rq, tsk); queued = task_on_rq_queued(tsk); if (queued) dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); if (unlikely(running)) put_prev_task(rq, tsk); sched_change_group(tsk, TASK_MOVE_GROUP); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); Loading Loading @@ -8204,15 +8224,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) sched_free_group(tg); } /* * This is called before wake_up_new_task(), therefore we really only * have to set its group bits, all the other stuff does not apply. */ static void cpu_cgroup_fork(struct task_struct *task) { sched_move_task(task); struct rq_flags rf; struct rq *rq; rq = task_rq_lock(task, &rf); sched_change_group(task, TASK_SET_GROUP); task_rq_unlock(rq, task, &rf); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; int ret = 0; cgroup_taskset_for_each(task, css, tset) { #ifdef CONFIG_RT_GROUP_SCHED Loading @@ -8223,8 +8255,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) if (task->sched_class != &fair_sched_class) return -EINVAL; #endif /* * Serialize against wake_up_new_task() such that if its * running, we're sure to observe its full state. */ raw_spin_lock_irq(&task->pi_lock); /* * Avoid calling sched_move_task() before wake_up_new_task() * has happened. This would lead to problems with PELT, due to * move wanting to detach+attach while we're not attached yet. */ if (task->state == TASK_NEW) ret = -EINVAL; raw_spin_unlock_irq(&task->pi_lock); if (ret) break; } return 0; return ret; } static void cpu_cgroup_attach(struct cgroup_taskset *tset) Loading kernel/sched/cpuacct.c +73 −41 Original line number Diff line number Diff line Loading @@ -25,15 +25,13 @@ enum cpuacct_stat_index { CPUACCT_STAT_NSTATS, }; enum cpuacct_usage_index { CPUACCT_USAGE_USER, /* ... user mode */ CPUACCT_USAGE_SYSTEM, /* ... kernel mode */ CPUACCT_USAGE_NRUSAGE, static const char * const cpuacct_stat_desc[] = { [CPUACCT_STAT_USER] = "user", [CPUACCT_STAT_SYSTEM] = "system", }; struct cpuacct_usage { u64 usages[CPUACCT_USAGE_NRUSAGE]; u64 usages[CPUACCT_STAT_NSTATS]; }; /* track cpu usage of a group of tasks and its child groups */ Loading Loading @@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css) } static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, enum cpuacct_usage_index index) enum cpuacct_stat_index index) { struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); u64 data; /* * We allow index == CPUACCT_USAGE_NRUSAGE here to read * We allow index == CPUACCT_STAT_NSTATS here to read * the sum of suages. */ BUG_ON(index > CPUACCT_USAGE_NRUSAGE); BUG_ON(index > CPUACCT_STAT_NSTATS); #ifndef CONFIG_64BIT /* Loading @@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, raw_spin_lock_irq(&cpu_rq(cpu)->lock); #endif if (index == CPUACCT_USAGE_NRUSAGE) { if (index == CPUACCT_STAT_NSTATS) { int i = 0; data = 0; for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) for (i = 0; i < CPUACCT_STAT_NSTATS; i++) data += cpuusage->usages[i]; } else { data = cpuusage->usages[index]; Loading @@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) raw_spin_lock_irq(&cpu_rq(cpu)->lock); #endif for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) for (i = 0; i < CPUACCT_STAT_NSTATS; i++) cpuusage->usages[i] = val; #ifndef CONFIG_64BIT Loading @@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) /* return total cpu usage (in nanoseconds) of a group */ static u64 __cpuusage_read(struct cgroup_subsys_state *css, enum cpuacct_usage_index index) enum cpuacct_stat_index index) { struct cpuacct *ca = css_ca(css); u64 totalcpuusage = 0; Loading @@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css, static u64 cpuusage_user_read(struct cgroup_subsys_state *css, struct cftype *cft) { return __cpuusage_read(css, CPUACCT_USAGE_USER); return __cpuusage_read(css, CPUACCT_STAT_USER); } static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, struct cftype *cft) { return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM); return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); } static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) { return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE); return __cpuusage_read(css, CPUACCT_STAT_NSTATS); } static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, Loading @@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, } static int __cpuacct_percpu_seq_show(struct seq_file *m, enum cpuacct_usage_index index) enum cpuacct_stat_index index) { struct cpuacct *ca = css_ca(seq_css(m)); u64 percpu; Loading @@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m, static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) { return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER); return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); } static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) { return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM); return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); } static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) { return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE); return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); } static const char * const cpuacct_stat_desc[] = { [CPUACCT_STAT_USER] = "user", [CPUACCT_STAT_SYSTEM] = "system", }; static int cpuacct_all_seq_show(struct seq_file *m, void *V) { struct cpuacct *ca = css_ca(seq_css(m)); int index; int cpu; seq_puts(m, "cpu"); for (index = 0; index < CPUACCT_STAT_NSTATS; index++) seq_printf(m, " %s", cpuacct_stat_desc[index]); seq_puts(m, "\n"); for_each_possible_cpu(cpu) { struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); seq_printf(m, "%d", cpu); for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { #ifndef CONFIG_64BIT /* * Take rq->lock to make 64-bit read safe on 32-bit * platforms. */ raw_spin_lock_irq(&cpu_rq(cpu)->lock); #endif seq_printf(m, " %llu", cpuusage->usages[index]); #ifndef CONFIG_64BIT raw_spin_unlock_irq(&cpu_rq(cpu)->lock); #endif } seq_puts(m, "\n"); } return 0; } static int cpuacct_stats_show(struct seq_file *sf, void *v) { struct cpuacct *ca = css_ca(seq_css(sf)); s64 val[CPUACCT_STAT_NSTATS]; int cpu; s64 val = 0; int stat; memset(val, 0, sizeof(val)); for_each_possible_cpu(cpu) { struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); val += kcpustat->cpustat[CPUTIME_USER]; val += kcpustat->cpustat[CPUTIME_NICE]; } val = cputime64_to_clock_t(val); seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; val = 0; for_each_possible_cpu(cpu) { struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); val += kcpustat->cpustat[CPUTIME_SYSTEM]; val += kcpustat->cpustat[CPUTIME_IRQ]; val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; } val = cputime64_to_clock_t(val); seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[stat], cputime64_to_clock_t(val[stat])); } return 0; } Loading Loading @@ -301,6 +329,10 @@ static struct cftype files[] = { .name = "usage_percpu_sys", .seq_show = cpuacct_percpu_sys_seq_show, }, { .name = "usage_all", .seq_show = cpuacct_all_seq_show, }, { .name = "stat", .seq_show = cpuacct_stats_show, Loading @@ -316,11 +348,11 @@ static struct cftype files[] = { void cpuacct_charge(struct task_struct *tsk, u64 cputime) { struct cpuacct *ca; int index = CPUACCT_USAGE_SYSTEM; int index = CPUACCT_STAT_SYSTEM; struct pt_regs *regs = task_pt_regs(tsk); if (regs && user_mode(regs)) index = CPUACCT_USAGE_USER; index = CPUACCT_STAT_USER; rcu_read_lock(); Loading Loading
arch/x86/kernel/kvm.c +0 −2 Original line number Diff line number Diff line Loading @@ -301,8 +301,6 @@ static void kvm_register_steal_time(void) if (!has_steal_clock) return; memset(st, 0, sizeof(*st)); wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); pr_info("kvm-stealtime: cpu %d, msr %llx\n", cpu, (unsigned long long) slow_virt_to_phys(st)); Loading
include/linux/sched.h +6 −2 Original line number Diff line number Diff line Loading @@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p); #define TASK_WAKING 256 #define TASK_PARKED 512 #define TASK_NOLOAD 1024 #define TASK_STATE_MAX 2048 #define TASK_NEW 2048 #define TASK_STATE_MAX 4096 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN" #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; Loading Loading @@ -2139,6 +2140,9 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } struct task_struct *task_rcu_dereference(struct task_struct **ptask); struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime); Loading
kernel/exit.c +76 −0 Original line number Diff line number Diff line Loading @@ -210,6 +210,82 @@ void release_task(struct task_struct *p) goto repeat; } /* * Note that if this function returns a valid task_struct pointer (!NULL) * task->usage must remain >0 for the duration of the RCU critical section. */ struct task_struct *task_rcu_dereference(struct task_struct **ptask) { struct sighand_struct *sighand; struct task_struct *task; /* * We need to verify that release_task() was not called and thus * delayed_put_task_struct() can't run and drop the last reference * before rcu_read_unlock(). We check task->sighand != NULL, * but we can read the already freed and reused memory. */ retry: task = rcu_dereference(*ptask); if (!task) return NULL; probe_kernel_address(&task->sighand, sighand); /* * Pairs with atomic_dec_and_test() in put_task_struct(). If this task * was already freed we can not miss the preceding update of this * pointer. */ smp_rmb(); if (unlikely(task != READ_ONCE(*ptask))) goto retry; /* * We've re-checked that "task == *ptask", now we have two different * cases: * * 1. This is actually the same task/task_struct. In this case * sighand != NULL tells us it is still alive. * * 2. This is another task which got the same memory for task_struct. * We can't know this of course, and we can not trust * sighand != NULL. * * In this case we actually return a random value, but this is * correct. * * If we return NULL - we can pretend that we actually noticed that * *ptask was updated when the previous task has exited. Or pretend * that probe_slab_address(&sighand) reads NULL. * * If we return the new task (because sighand is not NULL for any * reason) - this is fine too. This (new) task can't go away before * another gp pass. * * And note: We could even eliminate the false positive if re-read * task->sighand once again to avoid the falsely NULL. But this case * is very unlikely so we don't care. */ if (!sighand) return NULL; return task; } struct task_struct *try_get_task_struct(struct task_struct **ptask) { struct task_struct *task; rcu_read_lock(); task = task_rcu_dereference(ptask); if (task) get_task_struct(task); rcu_read_unlock(); return task; } /* * Determine if a process group is "orphaned", according to the POSIX * definition in 2.2.2.52. Orphaned process groups are not to be affected Loading
kernel/sched/core.c +81 −33 Original line number Diff line number Diff line Loading @@ -2342,11 +2342,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) __sched_fork(clone_flags, p); /* * We mark the process as running here. This guarantees that * We mark the process as NEW here. This guarantees that * nobody will actually run it, and a signal or other external * event cannot wake it up and insert it on the runqueue either. */ p->state = TASK_RUNNING; p->state = TASK_NEW; /* * Make sure we do not leak PI boosting priority to the child. Loading Loading @@ -2383,8 +2383,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->sched_class = &fair_sched_class; } if (p->sched_class->task_fork) p->sched_class->task_fork(p); init_entity_runnable_average(&p->se); /* * The child is not yet in the pid-hash so no cgroup attach races, Loading @@ -2394,7 +2393,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) * Silence PROVE_RCU. */ raw_spin_lock_irqsave(&p->pi_lock, flags); set_task_cpu(p, cpu); /* * We're setting the cpu for the first time, we don't migrate, * so use __set_task_cpu(). */ __set_task_cpu(p, cpu); if (p->sched_class->task_fork) p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); #ifdef CONFIG_SCHED_INFO Loading Loading @@ -2526,16 +2531,18 @@ void wake_up_new_task(struct task_struct *p) struct rq_flags rf; struct rq *rq; /* Initialize new task's runnable average */ init_entity_runnable_average(&p->se); raw_spin_lock_irqsave(&p->pi_lock, rf.flags); p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* * Fork balancing, do it here and not earlier because: * - cpus_allowed can change in the fork path * - any previously selected cpu might disappear through hotplug * * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, * as we're not fully set-up yet. */ set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); #endif rq = __task_rq_lock(p, &rf); post_init_entity_util_avg(&p->se); Loading Loading @@ -3161,6 +3168,9 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_cont("\n"); } #endif if (panic_on_warn) panic("scheduling while atomic\n"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } Loading Loading @@ -4752,7 +4762,8 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask * * Return: 0 on success. An error code otherwise. * Return: size of CPU mask copied to user_mask_ptr on success. An * error code otherwise. */ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, unsigned long __user *, user_mask_ptr) Loading Loading @@ -7231,7 +7242,6 @@ static void sched_rq_cpu_starting(unsigned int cpu) struct rq *rq = cpu_rq(cpu); rq->calc_load_update = calc_load_update; account_reset_rq(rq); update_max_interval(); } Loading Loading @@ -7711,6 +7721,8 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) INIT_LIST_HEAD(&tg->children); list_add_rcu(&tg->siblings, &parent->children); spin_unlock_irqrestore(&task_group_lock, flags); online_fair_sched_group(tg); } /* rcu callback to free various structures associated with a task group */ Loading Loading @@ -7739,27 +7751,9 @@ void sched_offline_group(struct task_group *tg) spin_unlock_irqrestore(&task_group_lock, flags); } /* change task's runqueue when it moves between groups. * The caller of this function should have put the task in its new group * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to * reflect its new group. */ void sched_move_task(struct task_struct *tsk) static void sched_change_group(struct task_struct *tsk, int type) { struct task_group *tg; int queued, running; struct rq_flags rf; struct rq *rq; rq = task_rq_lock(tsk, &rf); running = task_current(rq, tsk); queued = task_on_rq_queued(tsk); if (queued) dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); if (unlikely(running)) put_prev_task(rq, tsk); /* * All callers are synchronized by task_rq_lock(); we do not use RCU Loading @@ -7772,11 +7766,37 @@ void sched_move_task(struct task_struct *tsk) tsk->sched_task_group = tg; #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_move_group) tsk->sched_class->task_move_group(tsk); if (tsk->sched_class->task_change_group) tsk->sched_class->task_change_group(tsk, type); else #endif set_task_rq(tsk, task_cpu(tsk)); } /* * Change task's runqueue when it moves between groups. * * The caller of this function should have put the task in its new group by * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect * its new group. */ void sched_move_task(struct task_struct *tsk) { int queued, running; struct rq_flags rf; struct rq *rq; rq = task_rq_lock(tsk, &rf); running = task_current(rq, tsk); queued = task_on_rq_queued(tsk); if (queued) dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); if (unlikely(running)) put_prev_task(rq, tsk); sched_change_group(tsk, TASK_MOVE_GROUP); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); Loading Loading @@ -8204,15 +8224,27 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) sched_free_group(tg); } /* * This is called before wake_up_new_task(), therefore we really only * have to set its group bits, all the other stuff does not apply. */ static void cpu_cgroup_fork(struct task_struct *task) { sched_move_task(task); struct rq_flags rf; struct rq *rq; rq = task_rq_lock(task, &rf); sched_change_group(task, TASK_SET_GROUP); task_rq_unlock(rq, task, &rf); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) { struct task_struct *task; struct cgroup_subsys_state *css; int ret = 0; cgroup_taskset_for_each(task, css, tset) { #ifdef CONFIG_RT_GROUP_SCHED Loading @@ -8223,8 +8255,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) if (task->sched_class != &fair_sched_class) return -EINVAL; #endif /* * Serialize against wake_up_new_task() such that if its * running, we're sure to observe its full state. */ raw_spin_lock_irq(&task->pi_lock); /* * Avoid calling sched_move_task() before wake_up_new_task() * has happened. This would lead to problems with PELT, due to * move wanting to detach+attach while we're not attached yet. */ if (task->state == TASK_NEW) ret = -EINVAL; raw_spin_unlock_irq(&task->pi_lock); if (ret) break; } return 0; return ret; } static void cpu_cgroup_attach(struct cgroup_taskset *tset) Loading
kernel/sched/cpuacct.c +73 −41 Original line number Diff line number Diff line Loading @@ -25,15 +25,13 @@ enum cpuacct_stat_index { CPUACCT_STAT_NSTATS, }; enum cpuacct_usage_index { CPUACCT_USAGE_USER, /* ... user mode */ CPUACCT_USAGE_SYSTEM, /* ... kernel mode */ CPUACCT_USAGE_NRUSAGE, static const char * const cpuacct_stat_desc[] = { [CPUACCT_STAT_USER] = "user", [CPUACCT_STAT_SYSTEM] = "system", }; struct cpuacct_usage { u64 usages[CPUACCT_USAGE_NRUSAGE]; u64 usages[CPUACCT_STAT_NSTATS]; }; /* track cpu usage of a group of tasks and its child groups */ Loading Loading @@ -108,16 +106,16 @@ static void cpuacct_css_free(struct cgroup_subsys_state *css) } static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, enum cpuacct_usage_index index) enum cpuacct_stat_index index) { struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); u64 data; /* * We allow index == CPUACCT_USAGE_NRUSAGE here to read * We allow index == CPUACCT_STAT_NSTATS here to read * the sum of suages. */ BUG_ON(index > CPUACCT_USAGE_NRUSAGE); BUG_ON(index > CPUACCT_STAT_NSTATS); #ifndef CONFIG_64BIT /* Loading @@ -126,11 +124,11 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, raw_spin_lock_irq(&cpu_rq(cpu)->lock); #endif if (index == CPUACCT_USAGE_NRUSAGE) { if (index == CPUACCT_STAT_NSTATS) { int i = 0; data = 0; for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) for (i = 0; i < CPUACCT_STAT_NSTATS; i++) data += cpuusage->usages[i]; } else { data = cpuusage->usages[index]; Loading @@ -155,7 +153,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) raw_spin_lock_irq(&cpu_rq(cpu)->lock); #endif for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++) for (i = 0; i < CPUACCT_STAT_NSTATS; i++) cpuusage->usages[i] = val; #ifndef CONFIG_64BIT Loading @@ -165,7 +163,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) /* return total cpu usage (in nanoseconds) of a group */ static u64 __cpuusage_read(struct cgroup_subsys_state *css, enum cpuacct_usage_index index) enum cpuacct_stat_index index) { struct cpuacct *ca = css_ca(css); u64 totalcpuusage = 0; Loading @@ -180,18 +178,18 @@ static u64 __cpuusage_read(struct cgroup_subsys_state *css, static u64 cpuusage_user_read(struct cgroup_subsys_state *css, struct cftype *cft) { return __cpuusage_read(css, CPUACCT_USAGE_USER); return __cpuusage_read(css, CPUACCT_STAT_USER); } static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, struct cftype *cft) { return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM); return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); } static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) { return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE); return __cpuusage_read(css, CPUACCT_STAT_NSTATS); } static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, Loading @@ -213,7 +211,7 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, } static int __cpuacct_percpu_seq_show(struct seq_file *m, enum cpuacct_usage_index index) enum cpuacct_stat_index index) { struct cpuacct *ca = css_ca(seq_css(m)); u64 percpu; Loading @@ -229,48 +227,78 @@ static int __cpuacct_percpu_seq_show(struct seq_file *m, static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) { return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER); return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); } static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) { return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM); return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); } static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) { return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE); return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); } static const char * const cpuacct_stat_desc[] = { [CPUACCT_STAT_USER] = "user", [CPUACCT_STAT_SYSTEM] = "system", }; static int cpuacct_all_seq_show(struct seq_file *m, void *V) { struct cpuacct *ca = css_ca(seq_css(m)); int index; int cpu; seq_puts(m, "cpu"); for (index = 0; index < CPUACCT_STAT_NSTATS; index++) seq_printf(m, " %s", cpuacct_stat_desc[index]); seq_puts(m, "\n"); for_each_possible_cpu(cpu) { struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); seq_printf(m, "%d", cpu); for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { #ifndef CONFIG_64BIT /* * Take rq->lock to make 64-bit read safe on 32-bit * platforms. */ raw_spin_lock_irq(&cpu_rq(cpu)->lock); #endif seq_printf(m, " %llu", cpuusage->usages[index]); #ifndef CONFIG_64BIT raw_spin_unlock_irq(&cpu_rq(cpu)->lock); #endif } seq_puts(m, "\n"); } return 0; } static int cpuacct_stats_show(struct seq_file *sf, void *v) { struct cpuacct *ca = css_ca(seq_css(sf)); s64 val[CPUACCT_STAT_NSTATS]; int cpu; s64 val = 0; int stat; memset(val, 0, sizeof(val)); for_each_possible_cpu(cpu) { struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); val += kcpustat->cpustat[CPUTIME_USER]; val += kcpustat->cpustat[CPUTIME_NICE]; } val = cputime64_to_clock_t(val); seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val); u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; val = 0; for_each_possible_cpu(cpu) { struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); val += kcpustat->cpustat[CPUTIME_SYSTEM]; val += kcpustat->cpustat[CPUTIME_IRQ]; val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; } val = cputime64_to_clock_t(val); seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[stat], cputime64_to_clock_t(val[stat])); } return 0; } Loading Loading @@ -301,6 +329,10 @@ static struct cftype files[] = { .name = "usage_percpu_sys", .seq_show = cpuacct_percpu_sys_seq_show, }, { .name = "usage_all", .seq_show = cpuacct_all_seq_show, }, { .name = "stat", .seq_show = cpuacct_stats_show, Loading @@ -316,11 +348,11 @@ static struct cftype files[] = { void cpuacct_charge(struct task_struct *tsk, u64 cputime) { struct cpuacct *ca; int index = CPUACCT_USAGE_SYSTEM; int index = CPUACCT_STAT_SYSTEM; struct pt_regs *regs = task_pt_regs(tsk); if (regs && user_mode(regs)) index = CPUACCT_USAGE_USER; index = CPUACCT_STAT_USER; rcu_read_lock(); Loading