Loading kernel/sched/core.c +9 −1 Original line number Diff line number Diff line Loading @@ -1234,6 +1234,9 @@ unsigned int min_capacity = 1024; /* min(rq->capacity) */ unsigned int max_load_scale_factor = 1024; /* max possible load scale factor */ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */ /* Mask of all CPUs that have max_possible_capacity */ cpumask_t mpc_mask = CPU_MASK_ALL; /* Window size (in ns) */ __read_mostly unsigned int sched_ravg_window = 10000000; Loading Loading @@ -2571,8 +2574,13 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, mplsf = div_u64(((u64) rq->load_scale_factor) * rq->max_possible_freq, rq->max_freq); if (mpc > highest_mpc) if (mpc > highest_mpc) { highest_mpc = mpc; cpumask_clear(&mpc_mask); cpumask_set_cpu(i, &mpc_mask); } else if (mpc == highest_mpc) { cpumask_set_cpu(i, &mpc_mask); } if (mplsf > highest_mplsf) highest_mplsf = mplsf; Loading kernel/sched/fair.c +204 −161 Original line number Diff line number Diff line Loading @@ -2709,21 +2709,6 @@ int sched_get_cpu_mostly_idle_nr_run(int cpu) return rq->mostly_idle_nr_run; } /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is * in reference to "best cpu". */ u64 scale_load_to_cpu(u64 task_load, int cpu) { struct rq *rq = cpu_rq(cpu); task_load *= (u64)rq->load_scale_factor; task_load /= 1024; return task_load; } #ifdef CONFIG_CGROUP_SCHED static inline int upmigrate_discouraged(struct task_struct *p) Loading Loading @@ -2793,11 +2778,9 @@ static inline u64 cpu_load_sync(int cpu, int sync) } static int spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, int sync) spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq) { u64 total_load = cpu_load_sync(cpu, sync) + scale_load_to_cpu(task_load(p), cpu); u64 total_load = task_load + cpu_load; if (total_load > sched_spill_load || (rq->nr_running + 1) > sysctl_sched_spill_nr_run) Loading @@ -2815,10 +2798,9 @@ int mostly_idle_cpu(int cpu) && !sched_cpu_high_irqload(cpu); } static int mostly_idle_cpu_sync(int cpu, int sync) static int mostly_idle_cpu_sync(int cpu, u64 load, int sync) { struct rq *rq = cpu_rq(cpu); u64 load = cpu_load_sync(cpu, sync); int nr_running; nr_running = rq->nr_running; Loading Loading @@ -2916,14 +2898,12 @@ done: * sched_downmigrate. This will help avoid frequenty migrations for * tasks with load close to the upmigrate threshold */ static int task_will_fit(struct task_struct *p, int cpu) static int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu) { u64 load; int prev_cpu = task_cpu(p); struct rq *prev_rq = cpu_rq(prev_cpu); struct rq *prev_rq = cpu_rq(task_cpu(p)); struct rq *rq = cpu_rq(cpu); int upmigrate = sched_upmigrate; int nice = task_nice(p); int upmigrate, nice; if (rq->capacity == max_capacity) return 1; Loading @@ -2932,33 +2912,40 @@ static int task_will_fit(struct task_struct *p, int cpu) if (rq->capacity > prev_rq->capacity) return 1; } else { nice = task_nice(p); if (nice > sched_upmigrate_min_nice || upmigrate_discouraged(p)) return 1; load = scale_load_to_cpu(task_load(p), cpu); upmigrate = sched_upmigrate; if (prev_rq->capacity > rq->capacity) upmigrate = sched_downmigrate; if (load < upmigrate) if (task_load < upmigrate) return 1; } return 0; } static int eligible_cpu(struct task_struct *p, int cpu, int sync) static int task_will_fit(struct task_struct *p, int cpu) { u64 tload = scale_load_to_cpu(task_load(p), cpu); return task_load_will_fit(p, tload, cpu); } static int eligible_cpu(u64 task_load, u64 cpu_load, int cpu, int sync) { struct rq *rq = cpu_rq(cpu); if (sched_cpu_high_irqload(cpu)) return 0; if (mostly_idle_cpu_sync(cpu, sync)) if (mostly_idle_cpu_sync(cpu, cpu_load, sync)) return 1; if (rq->max_possible_capacity != max_possible_capacity) return !spill_threshold_crossed(p, rq, cpu, sync); return !spill_threshold_crossed(task_load, cpu_load, rq); return 0; } Loading Loading @@ -3013,22 +3000,23 @@ unsigned int power_cost_at_freq(int cpu, unsigned int freq) /* Return the cost of running task p on CPU cpu. This function * currently assumes that task p is the only task which will run on * the CPU. */ static unsigned int power_cost(struct task_struct *p, int cpu) static unsigned int power_cost(u64 task_load, int cpu) { unsigned int task_freq, cur_freq; struct rq *rq = cpu_rq(cpu); u64 demand; unsigned int task_freq; unsigned int cur_freq = cpu_rq(cpu)->cur_freq; if (!sysctl_sched_enable_power_aware) return cpu_rq(cpu)->max_possible_capacity; return rq->max_possible_capacity; /* calculate % of max freq needed */ demand = scale_load_to_cpu(task_load(p), cpu) * 100; demand = task_load * 100; demand = div64_u64(demand, max_task_load()); task_freq = demand * cpu_rq(cpu)->max_possible_freq; task_freq = demand * rq->max_possible_freq; task_freq /= 100; /* khz needed */ cur_freq = rq->cur_freq; task_freq = max(cur_freq, task_freq); return power_cost_at_freq(cpu, task_freq); Loading @@ -3036,102 +3024,103 @@ static unsigned int power_cost(struct task_struct *p, int cpu) static int best_small_task_cpu(struct task_struct *p, int sync) { int best_busy_cpu = -1, best_fallback_cpu = -1; int best_mi_cpu = -1; int min_cost_cpu = -1, min_cstate_cpu = -1; int best_busy_cpu = -1, fallback_cpu = -1; int min_cstate_cpu = -1; int min_cstate = INT_MAX; int min_fallback_cpu_cost = INT_MAX; int min_cost = INT_MAX; int i, cstate, cpu_cost; u64 load, min_busy_load = ULLONG_MAX; int cost_list[nr_cpu_ids]; int prev_cpu = task_cpu(p); struct cpumask search_cpus; int cpu_cost, min_cost = INT_MAX; int i = task_cpu(p), prev_cpu; int hmp_capable; u64 tload, cpu_load, min_load = ULLONG_MAX; cpumask_t temp; cpumask_t search_cpu; cpumask_t fb_search_cpu = CPU_MASK_NONE; struct rq *rq; cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); cpumask_and(&temp, &mpc_mask, cpu_possible_mask); hmp_capable = !cpumask_full(&temp); if (cpumask_empty(&search_cpus)) return prev_cpu; cpumask_and(&search_cpu, tsk_cpus_allowed(p), cpu_online_mask); if (unlikely(!cpumask_test_cpu(i, &search_cpu))) i = cpumask_first(&search_cpu); /* Take a first pass to find the lowest power cost CPU. This will avoid a potential O(n^2) search */ for_each_cpu(i, &search_cpus) { do { rq = cpu_rq(i); trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), mostly_idle_cpu_sync(i, sync), sched_irqload(i), power_cost(p, i), cpu_temp(i)); cpumask_clear_cpu(i, &search_cpu); cpu_cost = power_cost(p, i); if (cpu_cost < min_cost || (cpu_cost == min_cost && i == prev_cpu)) { min_cost = cpu_cost; min_cost_cpu = i; } trace_sched_cpu_load(rq, idle_cpu(i), mostly_idle_cpu_sync(i, cpu_load_sync(i, sync), sync), sched_irqload(i), power_cost(scale_load_to_cpu(task_load(p), i), i), cpu_temp(i)); cost_list[i] = cpu_cost; if (rq->max_possible_capacity == max_possible_capacity && hmp_capable) { cpumask_and(&fb_search_cpu, &search_cpu, &rq->freq_domain_cpumask); cpumask_andnot(&search_cpu, &search_cpu, &rq->freq_domain_cpumask); continue; } /* * Optimization to steer task towards the minimum power cost * CPU if it's the task's previous CPU. The tradeoff is that * we may have to check the same information again in pass 2. */ if (!cpu_rq(min_cost_cpu)->cstate && mostly_idle_cpu_sync(min_cost_cpu, sync) && !sched_cpu_high_irqload(min_cost_cpu) && min_cost_cpu == prev_cpu) return min_cost_cpu; for_each_cpu(i, &search_cpus) { struct rq *rq = cpu_rq(i); cstate = rq->cstate; if (power_delta_exceeded(cost_list[i], min_cost)) { if (cost_list[i] < min_fallback_cpu_cost || (cost_list[i] == min_fallback_cpu_cost && i == prev_cpu)) { best_fallback_cpu = i; min_fallback_cpu_cost = cost_list[i]; } if (sched_cpu_high_irqload(i)) continue; } if (idle_cpu(i) && cstate && !sched_cpu_high_irqload(i)) { if (cstate < min_cstate || (cstate == min_cstate && i == prev_cpu)) { if (idle_cpu(i) && rq->cstate) { if (rq->cstate < min_cstate) { min_cstate_cpu = i; min_cstate = cstate; min_cstate = rq->cstate; } continue; } if (mostly_idle_cpu_sync(i, sync) && !sched_cpu_high_irqload(i)) { if (best_mi_cpu == -1 || i == prev_cpu) best_mi_cpu = i; cpu_load = cpu_load_sync(i, sync); if (mostly_idle_cpu_sync(i, cpu_load, sync)) return i; } while ((i = cpumask_first(&search_cpu)) < nr_cpu_ids); if (min_cstate_cpu != -1) return min_cstate_cpu; cpumask_and(&search_cpu, tsk_cpus_allowed(p), cpu_online_mask); cpumask_andnot(&search_cpu, &search_cpu, &fb_search_cpu); for_each_cpu(i, &search_cpu) { rq = cpu_rq(i); prev_cpu = (i == task_cpu(p)); if (sched_cpu_high_irqload(i)) continue; } load = cpu_load_sync(i, sync); if (!spill_threshold_crossed(p, rq, i, sync)) { if (load < min_busy_load || (load == min_busy_load && i == prev_cpu)) { min_busy_load = load; tload = scale_load_to_cpu(task_load(p), i); cpu_load = cpu_load_sync(i, sync); if (!spill_threshold_crossed(tload, cpu_load, rq)) { if (cpu_load < min_load || (prev_cpu && cpu_load == min_load)) { min_load = cpu_load; best_busy_cpu = i; } } } if (best_mi_cpu != -1) return best_mi_cpu; if (min_cstate_cpu != -1) return min_cstate_cpu; if (best_busy_cpu != -1) return best_busy_cpu; return best_fallback_cpu; for_each_cpu(i, &fb_search_cpu) { rq = cpu_rq(i); prev_cpu = (i == task_cpu(p)); tload = scale_load_to_cpu(task_load(p), i); cpu_cost = power_cost(tload, i); if (cpu_cost < min_cost || (prev_cpu && cpu_cost == min_cost)) { fallback_cpu = i; min_cost = cpu_cost; } } return fallback_cpu; } #define UP_MIGRATION 1 Loading @@ -3139,38 +3128,58 @@ static int best_small_task_cpu(struct task_struct *p, int sync) #define EA_MIGRATION 3 #define IRQLOAD_MIGRATION 4 static int skip_cpu(struct task_struct *p, int cpu, int reason) static int skip_freq_domain(struct rq *task_rq, struct rq *rq, int reason) { struct rq *rq = cpu_rq(cpu); struct rq *task_rq = task_rq(p); int skip = 0; int skip; if (!reason) return 0; if (is_reserved(cpu)) return 1; switch (reason) { case UP_MIGRATION: skip = (rq->capacity <= task_rq->capacity); skip = rq->capacity <= task_rq->capacity; break; case DOWN_MIGRATION: skip = (rq->capacity >= task_rq->capacity); skip = rq->capacity >= task_rq->capacity; break; case EA_MIGRATION: skip = rq->capacity < task_rq->capacity || power_cost(p, cpu) > power_cost(p, task_cpu(p)); skip = rq->capacity != task_rq->capacity; break; case IRQLOAD_MIGRATION: /* Purposely fall through */ default: skip = (cpu == task_cpu(p)); return 0; } return skip; } static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, u64 task_load, int reason) { int skip; if (!reason) return 0; if (is_reserved(cpu)) return 1; switch (reason) { case EA_MIGRATION: skip = power_cost(task_load, cpu) > power_cost(task_load, cpu_of(task_rq)); break; case IRQLOAD_MIGRATION: /* Purposely fall through */ default: skip = (rq == task_rq); } return skip; Loading Loading @@ -3200,7 +3209,7 @@ static int select_packing_target(struct task_struct *p, int best_cpu) /* Pick the first lowest power cpu as target */ for_each_cpu(i, &search_cpus) { int cost = power_cost(p, i); int cost = power_cost(scale_load_to_cpu(task_load(p), i), i); if (cost < min_cost && !sched_cpu_high_irqload(i)) { target = i; Loading Loading @@ -3230,17 +3239,19 @@ static inline int wake_to_idle(struct task_struct *p) static int select_best_cpu(struct task_struct *p, int target, int reason, int sync) { int i, best_cpu = -1, fallback_idle_cpu = -1, min_cstate_cpu = -1; int prev_cpu = task_cpu(p); int i, j, prev_cpu, best_cpu = -1; int fallback_idle_cpu = -1, min_cstate_cpu = -1; int cpu_cost, min_cost = INT_MAX; int min_idle_cost = INT_MAX, min_busy_cost = INT_MAX; u64 load, min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX; u64 tload, cpu_load; u64 min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX; int small_task = is_small_task(p); int boost = sched_boost(); int cstate, min_cstate = INT_MAX; int prefer_idle = -1; int curr_cpu = smp_processor_id(); int prefer_idle_override = 0; cpumask_t search_cpus; struct rq *trq; if (reason) { prefer_idle = 1; Loading @@ -3264,33 +3275,52 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, goto done; } /* Todo : Optimize this loop */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { trq = task_rq(p); cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); for_each_cpu(i, &search_cpus) { struct rq *rq = cpu_rq(i); trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), mostly_idle_cpu_sync(i, sync), sched_irqload(i), power_cost(p, i), mostly_idle_cpu_sync(i, cpu_load_sync(i, sync), sync), sched_irqload(i), power_cost(scale_load_to_cpu(task_load(p), i), i), cpu_temp(i)); if (skip_cpu(p, i, reason)) if (skip_freq_domain(trq, rq, reason)) { cpumask_andnot(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); continue; } tload = scale_load_to_cpu(task_load(p), i); if (skip_cpu(trq, rq, i, tload, reason)) continue; prev_cpu = (i == task_cpu(p)); /* * The least-loaded mostly-idle CPU where the task * won't fit is our fallback if we can't find a CPU * where the task will fit. */ if (!task_will_fit(p, i)) { if (mostly_idle_cpu_sync(i, sync) && !sched_cpu_high_irqload(i)) { load = cpu_load_sync(i, sync); if (load < min_fallback_load || (load == min_fallback_load && i == prev_cpu)) { min_fallback_load = load; fallback_idle_cpu = i; if (!task_load_will_fit(p, tload, i)) { for_each_cpu_and(j, &search_cpus, &rq->freq_domain_cpumask) { cpu_load = cpu_load_sync(j, sync); if (mostly_idle_cpu_sync(j, cpu_load, sync) && !sched_cpu_high_irqload(j)) { if (cpu_load < min_fallback_load || (cpu_load == min_fallback_load && j == task_cpu(p))) { min_fallback_load = cpu_load; fallback_idle_cpu = j; } } } cpumask_andnot(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); continue; } Loading @@ -3298,7 +3328,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, if (prefer_idle == -1) prefer_idle = cpu_rq(i)->prefer_idle; if (!eligible_cpu(p, i, sync)) cpu_load = cpu_load_sync(i, sync); if (!eligible_cpu(tload, cpu_load, i, sync)) continue; /* Loading @@ -3307,9 +3338,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * spill. */ load = cpu_load_sync(i, sync); cpu_cost = power_cost(p, i); cstate = cpu_rq(i)->cstate; cpu_cost = power_cost(tload, i); /* * If the task fits in a CPU in a lower power band, that Loading Loading @@ -3337,8 +3366,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * prefer_idle is set. Otherwise if prefer_idle is unset sync * wakeups will get biased away from the waker CPU. */ if (idle_cpu(i) || (sync && i == curr_cpu && prefer_idle && cpu_rq(i)->nr_running == 1)) { if (idle_cpu(i) || (sync && i == smp_processor_id() && prefer_idle && cpu_rq(i)->nr_running == 1)) { cstate = cpu_rq(i)->cstate; if (cstate > min_cstate) continue; Loading @@ -3350,7 +3381,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, } if (cpu_cost < min_idle_cost || (cpu_cost == min_idle_cost && i == prev_cpu)) { (prev_cpu && cpu_cost == min_idle_cost)) { min_idle_cost = cpu_cost; min_cstate_cpu = i; } Loading @@ -3362,11 +3393,11 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * For CPUs that are not completely idle, pick one with the * lowest load and break ties with power cost */ if (load > min_load) if (cpu_load > min_load) continue; if (load < min_load) { min_load = load; if (cpu_load < min_load) { min_load = cpu_load; min_busy_cost = cpu_cost; best_cpu = i; continue; Loading @@ -3378,7 +3409,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * more power efficient CPU option. */ if (cpu_cost < min_busy_cost || (cpu_cost == min_busy_cost && i == prev_cpu)) { (prev_cpu && cpu_cost == min_busy_cost)) { min_busy_cost = cpu_cost; best_cpu = i; } Loading @@ -3389,7 +3420,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * best_cpu cannot have high irq load. */ if (min_cstate_cpu >= 0 && (prefer_idle > 0 || best_cpu < 0 || !mostly_idle_cpu_sync(best_cpu, sync))) !mostly_idle_cpu_sync(best_cpu, min_load, sync))) best_cpu = min_cstate_cpu; done: if (best_cpu < 0) { Loading @@ -3399,7 +3430,7 @@ done: * prev_cpu. We may just benefit from having * a hot cache. */ best_cpu = prev_cpu; best_cpu = task_cpu(p); else best_cpu = fallback_idle_cpu; } Loading Loading @@ -3900,15 +3931,27 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) { int i; int lowest_power_cpu = task_cpu(p); int lowest_power = power_cost(p, task_cpu(p)); int lowest_power = power_cost(scale_load_to_cpu(task_load(p), lowest_power_cpu), lowest_power_cpu); struct cpumask search_cpus; struct rq *rq = cpu_rq(cpu); /* * This function should be called only when task 'p' fits in the current * CPU which can be ensured by task_will_fit() prior to this. */ cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); cpumask_and(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); cpumask_clear_cpu(lowest_power_cpu, &search_cpus); /* Is a lower-powered idle CPU available which will fit this task? */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { if (idle_cpu(i) && task_will_fit(p, i)) { int idle_power_cost = power_cost(p, i); if (idle_power_cost < lowest_power) { for_each_cpu(i, &search_cpus) { if (idle_cpu(i)) { int cost = power_cost(scale_load_to_cpu(task_load(p), i), i); if (cost < lowest_power) { lowest_power_cpu = i; lowest_power = idle_power_cost; lowest_power = cost; } } } Loading Loading @@ -4072,13 +4115,13 @@ static inline int find_new_hmp_ilb(int type) return 0; } static inline int power_cost(struct task_struct *p, int cpu) static inline int power_cost(u64 task_load, int cpu) { return SCHED_CAPACITY_SCALE; } static inline int spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, int sync) spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq) { return 0; } Loading kernel/sched/sched.h +16 −1 Original line number Diff line number Diff line Loading @@ -901,6 +901,7 @@ extern unsigned int max_capacity; extern unsigned int min_capacity; extern unsigned int max_load_scale_factor; extern unsigned int max_possible_capacity; extern cpumask_t mpc_mask; extern unsigned long capacity_scale_cpu_efficiency(int cpu); extern unsigned long capacity_scale_cpu_freq(int cpu); extern unsigned int sched_mostly_idle_load; Loading @@ -909,7 +910,6 @@ extern unsigned int sched_upmigrate; extern unsigned int sched_downmigrate; extern unsigned int sched_init_task_load_pelt; extern unsigned int sched_init_task_load_windows; extern u64 scale_load_to_cpu(u64 load, int cpu); extern unsigned int sched_heavy_task; extern unsigned int up_down_migrate_scale_factor; extern void reset_cpu_hmp_stats(int cpu, int reset_cra); Loading @@ -921,6 +921,21 @@ unsigned int cpu_temp(int cpu); extern unsigned int nr_eligible_big_tasks(int cpu); extern void update_up_down_migrate(void); /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is * in reference to "best cpu". */ static inline u64 scale_load_to_cpu(u64 task_load, int cpu) { struct rq *rq = cpu_rq(cpu); task_load *= (u64)rq->load_scale_factor; task_load /= 1024; return task_load; } static inline int capacity(struct rq *rq) { return rq->capacity; Loading Loading
kernel/sched/core.c +9 −1 Original line number Diff line number Diff line Loading @@ -1234,6 +1234,9 @@ unsigned int min_capacity = 1024; /* min(rq->capacity) */ unsigned int max_load_scale_factor = 1024; /* max possible load scale factor */ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */ /* Mask of all CPUs that have max_possible_capacity */ cpumask_t mpc_mask = CPU_MASK_ALL; /* Window size (in ns) */ __read_mostly unsigned int sched_ravg_window = 10000000; Loading Loading @@ -2571,8 +2574,13 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, mplsf = div_u64(((u64) rq->load_scale_factor) * rq->max_possible_freq, rq->max_freq); if (mpc > highest_mpc) if (mpc > highest_mpc) { highest_mpc = mpc; cpumask_clear(&mpc_mask); cpumask_set_cpu(i, &mpc_mask); } else if (mpc == highest_mpc) { cpumask_set_cpu(i, &mpc_mask); } if (mplsf > highest_mplsf) highest_mplsf = mplsf; Loading
kernel/sched/fair.c +204 −161 Original line number Diff line number Diff line Loading @@ -2709,21 +2709,6 @@ int sched_get_cpu_mostly_idle_nr_run(int cpu) return rq->mostly_idle_nr_run; } /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is * in reference to "best cpu". */ u64 scale_load_to_cpu(u64 task_load, int cpu) { struct rq *rq = cpu_rq(cpu); task_load *= (u64)rq->load_scale_factor; task_load /= 1024; return task_load; } #ifdef CONFIG_CGROUP_SCHED static inline int upmigrate_discouraged(struct task_struct *p) Loading Loading @@ -2793,11 +2778,9 @@ static inline u64 cpu_load_sync(int cpu, int sync) } static int spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, int sync) spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq) { u64 total_load = cpu_load_sync(cpu, sync) + scale_load_to_cpu(task_load(p), cpu); u64 total_load = task_load + cpu_load; if (total_load > sched_spill_load || (rq->nr_running + 1) > sysctl_sched_spill_nr_run) Loading @@ -2815,10 +2798,9 @@ int mostly_idle_cpu(int cpu) && !sched_cpu_high_irqload(cpu); } static int mostly_idle_cpu_sync(int cpu, int sync) static int mostly_idle_cpu_sync(int cpu, u64 load, int sync) { struct rq *rq = cpu_rq(cpu); u64 load = cpu_load_sync(cpu, sync); int nr_running; nr_running = rq->nr_running; Loading Loading @@ -2916,14 +2898,12 @@ done: * sched_downmigrate. This will help avoid frequenty migrations for * tasks with load close to the upmigrate threshold */ static int task_will_fit(struct task_struct *p, int cpu) static int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu) { u64 load; int prev_cpu = task_cpu(p); struct rq *prev_rq = cpu_rq(prev_cpu); struct rq *prev_rq = cpu_rq(task_cpu(p)); struct rq *rq = cpu_rq(cpu); int upmigrate = sched_upmigrate; int nice = task_nice(p); int upmigrate, nice; if (rq->capacity == max_capacity) return 1; Loading @@ -2932,33 +2912,40 @@ static int task_will_fit(struct task_struct *p, int cpu) if (rq->capacity > prev_rq->capacity) return 1; } else { nice = task_nice(p); if (nice > sched_upmigrate_min_nice || upmigrate_discouraged(p)) return 1; load = scale_load_to_cpu(task_load(p), cpu); upmigrate = sched_upmigrate; if (prev_rq->capacity > rq->capacity) upmigrate = sched_downmigrate; if (load < upmigrate) if (task_load < upmigrate) return 1; } return 0; } static int eligible_cpu(struct task_struct *p, int cpu, int sync) static int task_will_fit(struct task_struct *p, int cpu) { u64 tload = scale_load_to_cpu(task_load(p), cpu); return task_load_will_fit(p, tload, cpu); } static int eligible_cpu(u64 task_load, u64 cpu_load, int cpu, int sync) { struct rq *rq = cpu_rq(cpu); if (sched_cpu_high_irqload(cpu)) return 0; if (mostly_idle_cpu_sync(cpu, sync)) if (mostly_idle_cpu_sync(cpu, cpu_load, sync)) return 1; if (rq->max_possible_capacity != max_possible_capacity) return !spill_threshold_crossed(p, rq, cpu, sync); return !spill_threshold_crossed(task_load, cpu_load, rq); return 0; } Loading Loading @@ -3013,22 +3000,23 @@ unsigned int power_cost_at_freq(int cpu, unsigned int freq) /* Return the cost of running task p on CPU cpu. This function * currently assumes that task p is the only task which will run on * the CPU. */ static unsigned int power_cost(struct task_struct *p, int cpu) static unsigned int power_cost(u64 task_load, int cpu) { unsigned int task_freq, cur_freq; struct rq *rq = cpu_rq(cpu); u64 demand; unsigned int task_freq; unsigned int cur_freq = cpu_rq(cpu)->cur_freq; if (!sysctl_sched_enable_power_aware) return cpu_rq(cpu)->max_possible_capacity; return rq->max_possible_capacity; /* calculate % of max freq needed */ demand = scale_load_to_cpu(task_load(p), cpu) * 100; demand = task_load * 100; demand = div64_u64(demand, max_task_load()); task_freq = demand * cpu_rq(cpu)->max_possible_freq; task_freq = demand * rq->max_possible_freq; task_freq /= 100; /* khz needed */ cur_freq = rq->cur_freq; task_freq = max(cur_freq, task_freq); return power_cost_at_freq(cpu, task_freq); Loading @@ -3036,102 +3024,103 @@ static unsigned int power_cost(struct task_struct *p, int cpu) static int best_small_task_cpu(struct task_struct *p, int sync) { int best_busy_cpu = -1, best_fallback_cpu = -1; int best_mi_cpu = -1; int min_cost_cpu = -1, min_cstate_cpu = -1; int best_busy_cpu = -1, fallback_cpu = -1; int min_cstate_cpu = -1; int min_cstate = INT_MAX; int min_fallback_cpu_cost = INT_MAX; int min_cost = INT_MAX; int i, cstate, cpu_cost; u64 load, min_busy_load = ULLONG_MAX; int cost_list[nr_cpu_ids]; int prev_cpu = task_cpu(p); struct cpumask search_cpus; int cpu_cost, min_cost = INT_MAX; int i = task_cpu(p), prev_cpu; int hmp_capable; u64 tload, cpu_load, min_load = ULLONG_MAX; cpumask_t temp; cpumask_t search_cpu; cpumask_t fb_search_cpu = CPU_MASK_NONE; struct rq *rq; cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); cpumask_and(&temp, &mpc_mask, cpu_possible_mask); hmp_capable = !cpumask_full(&temp); if (cpumask_empty(&search_cpus)) return prev_cpu; cpumask_and(&search_cpu, tsk_cpus_allowed(p), cpu_online_mask); if (unlikely(!cpumask_test_cpu(i, &search_cpu))) i = cpumask_first(&search_cpu); /* Take a first pass to find the lowest power cost CPU. This will avoid a potential O(n^2) search */ for_each_cpu(i, &search_cpus) { do { rq = cpu_rq(i); trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), mostly_idle_cpu_sync(i, sync), sched_irqload(i), power_cost(p, i), cpu_temp(i)); cpumask_clear_cpu(i, &search_cpu); cpu_cost = power_cost(p, i); if (cpu_cost < min_cost || (cpu_cost == min_cost && i == prev_cpu)) { min_cost = cpu_cost; min_cost_cpu = i; } trace_sched_cpu_load(rq, idle_cpu(i), mostly_idle_cpu_sync(i, cpu_load_sync(i, sync), sync), sched_irqload(i), power_cost(scale_load_to_cpu(task_load(p), i), i), cpu_temp(i)); cost_list[i] = cpu_cost; if (rq->max_possible_capacity == max_possible_capacity && hmp_capable) { cpumask_and(&fb_search_cpu, &search_cpu, &rq->freq_domain_cpumask); cpumask_andnot(&search_cpu, &search_cpu, &rq->freq_domain_cpumask); continue; } /* * Optimization to steer task towards the minimum power cost * CPU if it's the task's previous CPU. The tradeoff is that * we may have to check the same information again in pass 2. */ if (!cpu_rq(min_cost_cpu)->cstate && mostly_idle_cpu_sync(min_cost_cpu, sync) && !sched_cpu_high_irqload(min_cost_cpu) && min_cost_cpu == prev_cpu) return min_cost_cpu; for_each_cpu(i, &search_cpus) { struct rq *rq = cpu_rq(i); cstate = rq->cstate; if (power_delta_exceeded(cost_list[i], min_cost)) { if (cost_list[i] < min_fallback_cpu_cost || (cost_list[i] == min_fallback_cpu_cost && i == prev_cpu)) { best_fallback_cpu = i; min_fallback_cpu_cost = cost_list[i]; } if (sched_cpu_high_irqload(i)) continue; } if (idle_cpu(i) && cstate && !sched_cpu_high_irqload(i)) { if (cstate < min_cstate || (cstate == min_cstate && i == prev_cpu)) { if (idle_cpu(i) && rq->cstate) { if (rq->cstate < min_cstate) { min_cstate_cpu = i; min_cstate = cstate; min_cstate = rq->cstate; } continue; } if (mostly_idle_cpu_sync(i, sync) && !sched_cpu_high_irqload(i)) { if (best_mi_cpu == -1 || i == prev_cpu) best_mi_cpu = i; cpu_load = cpu_load_sync(i, sync); if (mostly_idle_cpu_sync(i, cpu_load, sync)) return i; } while ((i = cpumask_first(&search_cpu)) < nr_cpu_ids); if (min_cstate_cpu != -1) return min_cstate_cpu; cpumask_and(&search_cpu, tsk_cpus_allowed(p), cpu_online_mask); cpumask_andnot(&search_cpu, &search_cpu, &fb_search_cpu); for_each_cpu(i, &search_cpu) { rq = cpu_rq(i); prev_cpu = (i == task_cpu(p)); if (sched_cpu_high_irqload(i)) continue; } load = cpu_load_sync(i, sync); if (!spill_threshold_crossed(p, rq, i, sync)) { if (load < min_busy_load || (load == min_busy_load && i == prev_cpu)) { min_busy_load = load; tload = scale_load_to_cpu(task_load(p), i); cpu_load = cpu_load_sync(i, sync); if (!spill_threshold_crossed(tload, cpu_load, rq)) { if (cpu_load < min_load || (prev_cpu && cpu_load == min_load)) { min_load = cpu_load; best_busy_cpu = i; } } } if (best_mi_cpu != -1) return best_mi_cpu; if (min_cstate_cpu != -1) return min_cstate_cpu; if (best_busy_cpu != -1) return best_busy_cpu; return best_fallback_cpu; for_each_cpu(i, &fb_search_cpu) { rq = cpu_rq(i); prev_cpu = (i == task_cpu(p)); tload = scale_load_to_cpu(task_load(p), i); cpu_cost = power_cost(tload, i); if (cpu_cost < min_cost || (prev_cpu && cpu_cost == min_cost)) { fallback_cpu = i; min_cost = cpu_cost; } } return fallback_cpu; } #define UP_MIGRATION 1 Loading @@ -3139,38 +3128,58 @@ static int best_small_task_cpu(struct task_struct *p, int sync) #define EA_MIGRATION 3 #define IRQLOAD_MIGRATION 4 static int skip_cpu(struct task_struct *p, int cpu, int reason) static int skip_freq_domain(struct rq *task_rq, struct rq *rq, int reason) { struct rq *rq = cpu_rq(cpu); struct rq *task_rq = task_rq(p); int skip = 0; int skip; if (!reason) return 0; if (is_reserved(cpu)) return 1; switch (reason) { case UP_MIGRATION: skip = (rq->capacity <= task_rq->capacity); skip = rq->capacity <= task_rq->capacity; break; case DOWN_MIGRATION: skip = (rq->capacity >= task_rq->capacity); skip = rq->capacity >= task_rq->capacity; break; case EA_MIGRATION: skip = rq->capacity < task_rq->capacity || power_cost(p, cpu) > power_cost(p, task_cpu(p)); skip = rq->capacity != task_rq->capacity; break; case IRQLOAD_MIGRATION: /* Purposely fall through */ default: skip = (cpu == task_cpu(p)); return 0; } return skip; } static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, u64 task_load, int reason) { int skip; if (!reason) return 0; if (is_reserved(cpu)) return 1; switch (reason) { case EA_MIGRATION: skip = power_cost(task_load, cpu) > power_cost(task_load, cpu_of(task_rq)); break; case IRQLOAD_MIGRATION: /* Purposely fall through */ default: skip = (rq == task_rq); } return skip; Loading Loading @@ -3200,7 +3209,7 @@ static int select_packing_target(struct task_struct *p, int best_cpu) /* Pick the first lowest power cpu as target */ for_each_cpu(i, &search_cpus) { int cost = power_cost(p, i); int cost = power_cost(scale_load_to_cpu(task_load(p), i), i); if (cost < min_cost && !sched_cpu_high_irqload(i)) { target = i; Loading Loading @@ -3230,17 +3239,19 @@ static inline int wake_to_idle(struct task_struct *p) static int select_best_cpu(struct task_struct *p, int target, int reason, int sync) { int i, best_cpu = -1, fallback_idle_cpu = -1, min_cstate_cpu = -1; int prev_cpu = task_cpu(p); int i, j, prev_cpu, best_cpu = -1; int fallback_idle_cpu = -1, min_cstate_cpu = -1; int cpu_cost, min_cost = INT_MAX; int min_idle_cost = INT_MAX, min_busy_cost = INT_MAX; u64 load, min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX; u64 tload, cpu_load; u64 min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX; int small_task = is_small_task(p); int boost = sched_boost(); int cstate, min_cstate = INT_MAX; int prefer_idle = -1; int curr_cpu = smp_processor_id(); int prefer_idle_override = 0; cpumask_t search_cpus; struct rq *trq; if (reason) { prefer_idle = 1; Loading @@ -3264,33 +3275,52 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, goto done; } /* Todo : Optimize this loop */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { trq = task_rq(p); cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); for_each_cpu(i, &search_cpus) { struct rq *rq = cpu_rq(i); trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), mostly_idle_cpu_sync(i, sync), sched_irqload(i), power_cost(p, i), mostly_idle_cpu_sync(i, cpu_load_sync(i, sync), sync), sched_irqload(i), power_cost(scale_load_to_cpu(task_load(p), i), i), cpu_temp(i)); if (skip_cpu(p, i, reason)) if (skip_freq_domain(trq, rq, reason)) { cpumask_andnot(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); continue; } tload = scale_load_to_cpu(task_load(p), i); if (skip_cpu(trq, rq, i, tload, reason)) continue; prev_cpu = (i == task_cpu(p)); /* * The least-loaded mostly-idle CPU where the task * won't fit is our fallback if we can't find a CPU * where the task will fit. */ if (!task_will_fit(p, i)) { if (mostly_idle_cpu_sync(i, sync) && !sched_cpu_high_irqload(i)) { load = cpu_load_sync(i, sync); if (load < min_fallback_load || (load == min_fallback_load && i == prev_cpu)) { min_fallback_load = load; fallback_idle_cpu = i; if (!task_load_will_fit(p, tload, i)) { for_each_cpu_and(j, &search_cpus, &rq->freq_domain_cpumask) { cpu_load = cpu_load_sync(j, sync); if (mostly_idle_cpu_sync(j, cpu_load, sync) && !sched_cpu_high_irqload(j)) { if (cpu_load < min_fallback_load || (cpu_load == min_fallback_load && j == task_cpu(p))) { min_fallback_load = cpu_load; fallback_idle_cpu = j; } } } cpumask_andnot(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); continue; } Loading @@ -3298,7 +3328,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, if (prefer_idle == -1) prefer_idle = cpu_rq(i)->prefer_idle; if (!eligible_cpu(p, i, sync)) cpu_load = cpu_load_sync(i, sync); if (!eligible_cpu(tload, cpu_load, i, sync)) continue; /* Loading @@ -3307,9 +3338,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * spill. */ load = cpu_load_sync(i, sync); cpu_cost = power_cost(p, i); cstate = cpu_rq(i)->cstate; cpu_cost = power_cost(tload, i); /* * If the task fits in a CPU in a lower power band, that Loading Loading @@ -3337,8 +3366,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * prefer_idle is set. Otherwise if prefer_idle is unset sync * wakeups will get biased away from the waker CPU. */ if (idle_cpu(i) || (sync && i == curr_cpu && prefer_idle && cpu_rq(i)->nr_running == 1)) { if (idle_cpu(i) || (sync && i == smp_processor_id() && prefer_idle && cpu_rq(i)->nr_running == 1)) { cstate = cpu_rq(i)->cstate; if (cstate > min_cstate) continue; Loading @@ -3350,7 +3381,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, } if (cpu_cost < min_idle_cost || (cpu_cost == min_idle_cost && i == prev_cpu)) { (prev_cpu && cpu_cost == min_idle_cost)) { min_idle_cost = cpu_cost; min_cstate_cpu = i; } Loading @@ -3362,11 +3393,11 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * For CPUs that are not completely idle, pick one with the * lowest load and break ties with power cost */ if (load > min_load) if (cpu_load > min_load) continue; if (load < min_load) { min_load = load; if (cpu_load < min_load) { min_load = cpu_load; min_busy_cost = cpu_cost; best_cpu = i; continue; Loading @@ -3378,7 +3409,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * more power efficient CPU option. */ if (cpu_cost < min_busy_cost || (cpu_cost == min_busy_cost && i == prev_cpu)) { (prev_cpu && cpu_cost == min_busy_cost)) { min_busy_cost = cpu_cost; best_cpu = i; } Loading @@ -3389,7 +3420,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * best_cpu cannot have high irq load. */ if (min_cstate_cpu >= 0 && (prefer_idle > 0 || best_cpu < 0 || !mostly_idle_cpu_sync(best_cpu, sync))) !mostly_idle_cpu_sync(best_cpu, min_load, sync))) best_cpu = min_cstate_cpu; done: if (best_cpu < 0) { Loading @@ -3399,7 +3430,7 @@ done: * prev_cpu. We may just benefit from having * a hot cache. */ best_cpu = prev_cpu; best_cpu = task_cpu(p); else best_cpu = fallback_idle_cpu; } Loading Loading @@ -3900,15 +3931,27 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) { int i; int lowest_power_cpu = task_cpu(p); int lowest_power = power_cost(p, task_cpu(p)); int lowest_power = power_cost(scale_load_to_cpu(task_load(p), lowest_power_cpu), lowest_power_cpu); struct cpumask search_cpus; struct rq *rq = cpu_rq(cpu); /* * This function should be called only when task 'p' fits in the current * CPU which can be ensured by task_will_fit() prior to this. */ cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); cpumask_and(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); cpumask_clear_cpu(lowest_power_cpu, &search_cpus); /* Is a lower-powered idle CPU available which will fit this task? */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { if (idle_cpu(i) && task_will_fit(p, i)) { int idle_power_cost = power_cost(p, i); if (idle_power_cost < lowest_power) { for_each_cpu(i, &search_cpus) { if (idle_cpu(i)) { int cost = power_cost(scale_load_to_cpu(task_load(p), i), i); if (cost < lowest_power) { lowest_power_cpu = i; lowest_power = idle_power_cost; lowest_power = cost; } } } Loading Loading @@ -4072,13 +4115,13 @@ static inline int find_new_hmp_ilb(int type) return 0; } static inline int power_cost(struct task_struct *p, int cpu) static inline int power_cost(u64 task_load, int cpu) { return SCHED_CAPACITY_SCALE; } static inline int spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, int sync) spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq) { return 0; } Loading
kernel/sched/sched.h +16 −1 Original line number Diff line number Diff line Loading @@ -901,6 +901,7 @@ extern unsigned int max_capacity; extern unsigned int min_capacity; extern unsigned int max_load_scale_factor; extern unsigned int max_possible_capacity; extern cpumask_t mpc_mask; extern unsigned long capacity_scale_cpu_efficiency(int cpu); extern unsigned long capacity_scale_cpu_freq(int cpu); extern unsigned int sched_mostly_idle_load; Loading @@ -909,7 +910,6 @@ extern unsigned int sched_upmigrate; extern unsigned int sched_downmigrate; extern unsigned int sched_init_task_load_pelt; extern unsigned int sched_init_task_load_windows; extern u64 scale_load_to_cpu(u64 load, int cpu); extern unsigned int sched_heavy_task; extern unsigned int up_down_migrate_scale_factor; extern void reset_cpu_hmp_stats(int cpu, int reset_cra); Loading @@ -921,6 +921,21 @@ unsigned int cpu_temp(int cpu); extern unsigned int nr_eligible_big_tasks(int cpu); extern void update_up_down_migrate(void); /* * 'load' is in reference to "best cpu" at its best frequency. * Scale that in reference to a given cpu, accounting for how bad it is * in reference to "best cpu". */ static inline u64 scale_load_to_cpu(u64 task_load, int cpu) { struct rq *rq = cpu_rq(cpu); task_load *= (u64)rq->load_scale_factor; task_load /= 1024; return task_load; } static inline int capacity(struct rq *rq) { return rq->capacity; Loading