Loading Documentation/scheduler/sched-hmp.txt +3 −1 Original line number Diff line number Diff line Loading @@ -1254,7 +1254,7 @@ idle CPUs which are not completely idle, increasing task packing behavior. Appears at: /proc/sys/kernel/sched_min_runtime Default value: 200000000 (200ms) Default value: 0 (0 ms) This tunable helps avouid frequent migration of task on account of energy-awareness. During scheduler tick, a check is made (in migration_needed()) Loading @@ -1267,6 +1267,8 @@ is used in migration_needed() to avoid "frequent" migrations. Once a task has been associated with a cpu (in either running or runnable state) for more than 'sched_min_vruntime' ns, it is considered eligible for migration in tick path on account of energy awareness reasons. The same logic also applies to the load balancer path to avoid frequent migrations due to energy awareness. ========================= 8. HMP SCHEDULER TRACE POINTS Loading kernel/sched/core.c +3 −3 Original line number Diff line number Diff line Loading @@ -2593,7 +2593,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu, pct_task_load(p)); note_run_start(p, -1); note_run_start(p, sched_clock()); if (task_cpu(p) != new_cpu) { struct task_migration_notifier tmn; Loading Loading @@ -3148,6 +3148,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (src_cpu != cpu) { wake_flags |= WF_MIGRATED; set_task_cpu(p, cpu); } else { note_run_start(p, wallclock); } #endif /* CONFIG_SMP */ Loading Loading @@ -4678,7 +4680,6 @@ need_resched: prev->state = TASK_RUNNING; } else { deactivate_task(rq, prev, DEQUEUE_SLEEP); note_run_start(prev, -1); prev->on_rq = 0; /* Loading Loading @@ -4709,7 +4710,6 @@ need_resched: update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0); clear_tsk_need_resched(prev); rq->skip_clock_update = 0; note_run_start(next, wallclock); BUG_ON(task_cpu(next) != cpu_of(rq)); Loading kernel/sched/fair.c +30 −7 Original line number Diff line number Diff line Loading @@ -1229,7 +1229,12 @@ unsigned int __read_mostly sched_init_task_load_pelt; unsigned int __read_mostly sched_init_task_load_windows; unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15; unsigned int __read_mostly sysctl_sched_min_runtime = 200000000; /* 200 ms */ /* * Keep these two below in sync. One is in unit of ns and the * other in unit of us. */ unsigned int __read_mostly sysctl_sched_min_runtime = 0; /* 0 ms */ u64 __read_mostly sched_min_runtime = 0; /* 0 ms */ static inline unsigned int task_load(struct task_struct *p) { Loading Loading @@ -2197,6 +2202,10 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write, if (ret || !write || !sched_enable_hmp) return ret; if (data == &sysctl_sched_min_runtime) { sched_min_runtime = ((u64) sysctl_sched_min_runtime) * 1000; return 0; } if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) || *data > 100) { *data = old_val; Loading Loading @@ -2295,10 +2304,6 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) int i; int lowest_power_cpu = task_cpu(p); int lowest_power = power_cost(p, task_cpu(p)); u64 delta = sched_clock() - p->run_start; if (delta < sysctl_sched_min_runtime) return 0; /* Is a lower-powered idle CPU available which will fit this task? */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { Loading @@ -2315,6 +2320,7 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) } static inline int is_cpu_throttling_imminent(int cpu); static inline int is_task_migration_throttled(struct task_struct *p); /* * Check if a task is on the "wrong" cpu (i.e its current cpu is not the ideal Loading Loading @@ -2348,6 +2354,7 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p) return MOVE_TO_BIG_CPU; if (sched_enable_power_aware && !is_task_migration_throttled(p) && is_cpu_throttling_imminent(cpu_of(rq)) && lower_power_cpu_available(p, cpu_of(rq))) return MOVE_TO_POWER_EFFICIENT_CPU; Loading Loading @@ -2422,6 +2429,13 @@ static inline int is_cpu_throttling_imminent(int cpu) return throttling; } static inline int is_task_migration_throttled(struct task_struct *p) { u64 delta = sched_clock() - p->run_start; return delta < sched_min_runtime; } unsigned int cpu_temp(int cpu) { struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats(); Loading Loading @@ -2493,6 +2507,11 @@ static inline int is_cpu_throttling_imminent(int cpu) return 0; } static inline int is_task_migration_throttled(struct task_struct *p) { return 0; } unsigned int cpu_temp(int cpu) { return 0; Loading Loading @@ -6098,6 +6117,8 @@ static bool update_sd_pick_busiest(struct lb_env *env, struct sched_group *sg, struct sg_lb_stats *sgs) { int cpu; if (sched_boost() && !sds->busiest && sgs->sum_nr_running && (env->idle != CPU_NOT_IDLE) && (capacity(env->dst_rq) > group_rq_capacity(sg))) { Loading @@ -6122,11 +6143,13 @@ static bool update_sd_pick_busiest(struct lb_env *env, * seen a busy group yet and we are close to throttling. We want to * prioritize spreading work over power optimization. */ cpu = cpumask_first(sched_group_cpus(sg)); if (!sds->busiest && (capacity(env->dst_rq) == group_rq_capacity(sg)) && sgs->sum_nr_running && (env->idle != CPU_NOT_IDLE) && power_cost_at_freq(env->dst_cpu, 0) < power_cost_at_freq(cpumask_first(sched_group_cpus(sg)), 0) && is_cpu_throttling_imminent(cpumask_first(sched_group_cpus(sg)))) { power_cost_at_freq(cpu, 0) && !is_task_migration_throttled(cpu_rq(cpu)->curr) && is_cpu_throttling_imminent(cpu)) { env->flags |= LBF_PWR_ACTIVE_BALANCE; return true; } Loading kernel/sysctl.c +1 −1 Original line number Diff line number Diff line Loading @@ -379,7 +379,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_min_runtime, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, .proc_handler = sched_hmp_proc_update_handler, }, { .procname = "sched_spill_load", Loading Loading
Documentation/scheduler/sched-hmp.txt +3 −1 Original line number Diff line number Diff line Loading @@ -1254,7 +1254,7 @@ idle CPUs which are not completely idle, increasing task packing behavior. Appears at: /proc/sys/kernel/sched_min_runtime Default value: 200000000 (200ms) Default value: 0 (0 ms) This tunable helps avouid frequent migration of task on account of energy-awareness. During scheduler tick, a check is made (in migration_needed()) Loading @@ -1267,6 +1267,8 @@ is used in migration_needed() to avoid "frequent" migrations. Once a task has been associated with a cpu (in either running or runnable state) for more than 'sched_min_vruntime' ns, it is considered eligible for migration in tick path on account of energy awareness reasons. The same logic also applies to the load balancer path to avoid frequent migrations due to energy awareness. ========================= 8. HMP SCHEDULER TRACE POINTS Loading
kernel/sched/core.c +3 −3 Original line number Diff line number Diff line Loading @@ -2593,7 +2593,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu, pct_task_load(p)); note_run_start(p, -1); note_run_start(p, sched_clock()); if (task_cpu(p) != new_cpu) { struct task_migration_notifier tmn; Loading Loading @@ -3148,6 +3148,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (src_cpu != cpu) { wake_flags |= WF_MIGRATED; set_task_cpu(p, cpu); } else { note_run_start(p, wallclock); } #endif /* CONFIG_SMP */ Loading Loading @@ -4678,7 +4680,6 @@ need_resched: prev->state = TASK_RUNNING; } else { deactivate_task(rq, prev, DEQUEUE_SLEEP); note_run_start(prev, -1); prev->on_rq = 0; /* Loading Loading @@ -4709,7 +4710,6 @@ need_resched: update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0); clear_tsk_need_resched(prev); rq->skip_clock_update = 0; note_run_start(next, wallclock); BUG_ON(task_cpu(next) != cpu_of(rq)); Loading
kernel/sched/fair.c +30 −7 Original line number Diff line number Diff line Loading @@ -1229,7 +1229,12 @@ unsigned int __read_mostly sched_init_task_load_pelt; unsigned int __read_mostly sched_init_task_load_windows; unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15; unsigned int __read_mostly sysctl_sched_min_runtime = 200000000; /* 200 ms */ /* * Keep these two below in sync. One is in unit of ns and the * other in unit of us. */ unsigned int __read_mostly sysctl_sched_min_runtime = 0; /* 0 ms */ u64 __read_mostly sched_min_runtime = 0; /* 0 ms */ static inline unsigned int task_load(struct task_struct *p) { Loading Loading @@ -2197,6 +2202,10 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write, if (ret || !write || !sched_enable_hmp) return ret; if (data == &sysctl_sched_min_runtime) { sched_min_runtime = ((u64) sysctl_sched_min_runtime) * 1000; return 0; } if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) || *data > 100) { *data = old_val; Loading Loading @@ -2295,10 +2304,6 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) int i; int lowest_power_cpu = task_cpu(p); int lowest_power = power_cost(p, task_cpu(p)); u64 delta = sched_clock() - p->run_start; if (delta < sysctl_sched_min_runtime) return 0; /* Is a lower-powered idle CPU available which will fit this task? */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { Loading @@ -2315,6 +2320,7 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) } static inline int is_cpu_throttling_imminent(int cpu); static inline int is_task_migration_throttled(struct task_struct *p); /* * Check if a task is on the "wrong" cpu (i.e its current cpu is not the ideal Loading Loading @@ -2348,6 +2354,7 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p) return MOVE_TO_BIG_CPU; if (sched_enable_power_aware && !is_task_migration_throttled(p) && is_cpu_throttling_imminent(cpu_of(rq)) && lower_power_cpu_available(p, cpu_of(rq))) return MOVE_TO_POWER_EFFICIENT_CPU; Loading Loading @@ -2422,6 +2429,13 @@ static inline int is_cpu_throttling_imminent(int cpu) return throttling; } static inline int is_task_migration_throttled(struct task_struct *p) { u64 delta = sched_clock() - p->run_start; return delta < sched_min_runtime; } unsigned int cpu_temp(int cpu) { struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats(); Loading Loading @@ -2493,6 +2507,11 @@ static inline int is_cpu_throttling_imminent(int cpu) return 0; } static inline int is_task_migration_throttled(struct task_struct *p) { return 0; } unsigned int cpu_temp(int cpu) { return 0; Loading Loading @@ -6098,6 +6117,8 @@ static bool update_sd_pick_busiest(struct lb_env *env, struct sched_group *sg, struct sg_lb_stats *sgs) { int cpu; if (sched_boost() && !sds->busiest && sgs->sum_nr_running && (env->idle != CPU_NOT_IDLE) && (capacity(env->dst_rq) > group_rq_capacity(sg))) { Loading @@ -6122,11 +6143,13 @@ static bool update_sd_pick_busiest(struct lb_env *env, * seen a busy group yet and we are close to throttling. We want to * prioritize spreading work over power optimization. */ cpu = cpumask_first(sched_group_cpus(sg)); if (!sds->busiest && (capacity(env->dst_rq) == group_rq_capacity(sg)) && sgs->sum_nr_running && (env->idle != CPU_NOT_IDLE) && power_cost_at_freq(env->dst_cpu, 0) < power_cost_at_freq(cpumask_first(sched_group_cpus(sg)), 0) && is_cpu_throttling_imminent(cpumask_first(sched_group_cpus(sg)))) { power_cost_at_freq(cpu, 0) && !is_task_migration_throttled(cpu_rq(cpu)->curr) && is_cpu_throttling_imminent(cpu)) { env->flags |= LBF_PWR_ACTIVE_BALANCE; return true; } Loading
kernel/sysctl.c +1 −1 Original line number Diff line number Diff line Loading @@ -379,7 +379,7 @@ static struct ctl_table kern_table[] = { .data = &sysctl_sched_min_runtime, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, .proc_handler = sched_hmp_proc_update_handler, }, { .procname = "sched_spill_load", Loading