Loading include/linux/interrupt.h +7 −0 Original line number Diff line number Diff line Loading @@ -528,6 +528,12 @@ enum }; #define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) /* Softirq's where the handling might be long: */ #define LONG_SOFTIRQ_MASK ((1 << NET_TX_SOFTIRQ) | \ (1 << NET_RX_SOFTIRQ) | \ (1 << BLOCK_SOFTIRQ) | \ (1 << IRQ_POLL_SOFTIRQ) | \ (1 << TASKLET_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. Loading Loading @@ -563,6 +569,7 @@ extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); DECLARE_PER_CPU(__u32, active_softirqs); static inline struct task_struct *this_cpu_ksoftirqd(void) { Loading include/linux/sched.h +5 −0 Original line number Diff line number Diff line Loading @@ -1721,6 +1721,7 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_ #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern bool cpupri_check_rt(void); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { Loading @@ -1731,6 +1732,10 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma return -EINVAL; return 0; } static inline bool cpupri_check_rt(void) { return false; } #endif extern int yield_to(struct task_struct *p, bool preempt); Loading kernel/sched/cpupri.c +44 −2 Original line number Diff line number Diff line Loading @@ -41,6 +41,27 @@ static int convert_prio(int prio) return cpupri; } /** * drop_nopreempt_cpus - remove a cpu from the mask if it is likely * non-preemptible * @lowest_mask: mask with selected CPUs (non-NULL) */ static void drop_nopreempt_cpus(struct cpumask *lowest_mask) { unsigned int cpu = cpumask_first(lowest_mask); while (cpu < nr_cpu_ids) { /* unlocked access */ struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr); if (task_may_not_preempt(task, cpu)) cpumask_clear_cpu(cpu, lowest_mask); cpu = cpumask_next(cpu, lowest_mask); } } /** * cpupri_find - find the best (lowest-pri) CPU in the system * @cp: The cpupri context Loading @@ -61,9 +82,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, { int idx = 0; int task_pri = convert_prio(p->prio); bool drop_nopreempts = task_pri <= MAX_RT_PRIO; BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); retry: for (idx = 0; idx < task_pri; idx++) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; int skip = 0; Loading Loading @@ -99,7 +122,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (lowest_mask) { cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); if (drop_nopreempts) drop_nopreempt_cpus(lowest_mask); /* * We have to ensure that we have at least one bit * still set in the array, since the map could have Loading @@ -114,7 +138,14 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, return 1; } /* * If we can't find any non-preemptible cpu's, retry so we can * find the lowest priority target and avoid priority inversion. */ if (drop_nopreempts) { drop_nopreempts = false; goto retry; } return 0; } Loading Loading @@ -235,3 +266,14 @@ void cpupri_cleanup(struct cpupri *cp) for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) free_cpumask_var(cp->pri_to_cpu[i].mask); } /* * cpupri_check_rt - check if CPU has a RT task * should be called from rcu-sched read section. */ bool cpupri_check_rt(void) { int cpu = raw_smp_processor_id(); return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL; } kernel/sched/rt.c +231 −9 Original line number Diff line number Diff line Loading @@ -6,6 +6,9 @@ #include "sched.h" #include "pelt.h" #include <linux/interrupt.h> #include "walt.h" int sched_rr_timeslice = RR_TIMESLICE; Loading Loading @@ -910,6 +913,66 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) return rt_task_of(rt_se)->prio; } static void dump_throttled_rt_tasks(struct rt_rq *rt_rq) { struct rt_prio_array *array = &rt_rq->active; struct sched_rt_entity *rt_se; char buf[500]; char *pos = buf; char *end = buf + sizeof(buf); int idx; struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); pos += snprintf(pos, sizeof(buf), "sched: RT throttling activated for rt_rq %pK (cpu %d)\n", rt_rq, cpu_of(rq_of_rt_rq(rt_rq))); pos += snprintf(pos, end - pos, "rt_period_timer: expires=%lld now=%llu period=%llu\n", hrtimer_get_expires_ns(&rt_b->rt_period_timer), ktime_get_ns(), sched_rt_period(rt_rq)); if (bitmap_empty(array->bitmap, MAX_RT_PRIO)) goto out; pos += snprintf(pos, end - pos, "potential CPU hogs:\n"); #ifdef CONFIG_SCHED_INFO if (sched_info_on()) pos += snprintf(pos, end - pos, "current %s (%d) is running for %llu nsec\n", current->comm, current->pid, rq_clock(rq_of_rt_rq(rt_rq)) - current->sched_info.last_arrival); #endif idx = sched_find_first_bit(array->bitmap); while (idx < MAX_RT_PRIO) { list_for_each_entry(rt_se, array->queue + idx, run_list) { struct task_struct *p; if (!rt_entity_is_task(rt_se)) continue; p = rt_task_of(rt_se); if (pos < end) pos += snprintf(pos, end - pos, "\t%s (%d)\n", p->comm, p->pid); } idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1); } out: #ifdef CONFIG_PANIC_ON_RT_THROTTLING /* * Use pr_err() in the BUG() case since printk_sched() will * not get flushed and deadlock is not a concern. */ pr_err("%s\n", buf); BUG(); #else printk_deferred("%s\n", buf); #endif } static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); Loading @@ -933,8 +996,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { static bool once; rt_rq->rt_throttled = 1; printk_deferred_once("sched: RT throttling activated\n"); if (!once) { once = true; dump_throttled_rt_tasks(rt_rq); } } else { /* * In case we did anyway, make it go away, Loading Loading @@ -1339,6 +1408,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) rt_se->timeout = 0; enqueue_rt_entity(rt_se, flags); walt_inc_cumulative_runnable_avg(rq, p); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); Loading @@ -1350,6 +1420,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) update_curr_rt(rq); dequeue_rt_entity(rt_se, flags); walt_dec_cumulative_runnable_avg(rq, p); dequeue_pushable_task(rq, p); } Loading Loading @@ -1391,11 +1462,30 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); /* * Return whether the task on the given cpu is currently non-preemptible * while handling a potentially long softint, or if the task is likely * to block preemptions soon because it is a ksoftirq thread that is * handling slow softints. */ bool task_may_not_preempt(struct task_struct *task, int cpu) { __u32 softirqs = per_cpu(active_softirqs, cpu) | __IRQ_STAT(cpu, __softirq_pending); struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu); return ((softirqs & LONG_SOFTIRQ_MASK) && (task == cpu_ksoftirqd || task_thread_info(task)->preempt_count & SOFTIRQ_MASK)); } static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; bool may_not_preempt; /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) Loading @@ -1407,7 +1497,17 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) curr = READ_ONCE(rq->curr); /* unlocked access */ /* * If the current task on @p's runqueue is an RT task, then * If the current task on @p's runqueue is a softirq task, * it may run without preemption for a time that is * ill-suited for a waiting RT task. Therefore, try to * wake this RT task on another runqueue. * * Also, if the current task on @p's runqueue is an RT task, then * it may run without preemption for a time that is * ill-suited for a waiting RT task. Therefore, try to * wake this RT task on another runqueue. * * Also, if the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task * on its current runqueue. Loading @@ -1428,17 +1528,22 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && may_not_preempt = task_may_not_preempt(curr, cpu); if (sched_energy_enabled() || may_not_preempt || (unlikely(rt_task(curr)) && (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio)) { curr->prio <= p->prio))) { int target = find_lowest_rq(p); /* * Don't bother moving it if the destination CPU is * not running a lower priority task. * If cpu is non-preemptible, prefer remote cpu * even if it's running a higher-prio task. * Otherwise: Don't bother moving it if the * destination CPU is not running a lower priority task. */ if (target != -1 && p->prio < cpu_rq(target)->rt.highest_prio.curr) (may_not_preempt || p->prio < cpu_rq(target)->rt.highest_prio.curr)) cpu = target; } rcu_read_unlock(); Loading Loading @@ -1634,12 +1739,119 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); #ifdef CONFIG_SCHED_WALT static int rt_energy_aware_wake_cpu(struct task_struct *task) { struct sched_domain *sd; struct sched_group *sg; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int cpu, best_cpu = -1; unsigned long best_capacity = ULONG_MAX; unsigned long util, best_cpu_util = ULONG_MAX; unsigned long best_cpu_util_cum = ULONG_MAX; unsigned long util_cum; unsigned long tutil = task_util(task); int best_cpu_idle_idx = INT_MAX; int cpu_idle_idx = -1; bool boost_on_big = rt_boost_on_big(); rcu_read_lock(); cpu = cpu_rq(smp_processor_id())->rd->min_cap_orig_cpu; if (cpu < 0) goto unlock; sd = rcu_dereference(*per_cpu_ptr(&sd_asym_cpucapacity, cpu)); if (!sd) goto unlock; retry: sg = sd->groups; do { int fcpu = group_first_cpu(sg); int capacity_orig = capacity_orig_of(fcpu); if (boost_on_big) { if (is_min_capacity_cpu(fcpu)) continue; } else { if (capacity_orig > best_capacity) continue; } for_each_cpu_and(cpu, lowest_mask, sched_group_span(sg)) { if (cpu_isolated(cpu)) continue; if (sched_cpu_high_irqload(cpu)) continue; util = cpu_util(cpu); if (__cpu_overutilized(cpu, util + tutil)) continue; /* Find the least loaded CPU */ if (util > best_cpu_util) continue; /* * If the previous CPU has same load, keep it as * best_cpu. */ if (best_cpu_util == util && best_cpu == task_cpu(task)) continue; /* * If candidate CPU is the previous CPU, select it. * Otherwise, if its load is same with best_cpu and in * a shallower C-state, select it. If all above * conditions are same, select the least cumulative * window demand CPU. */ cpu_idle_idx = idle_get_state_idx(cpu_rq(cpu)); util_cum = cpu_util_cum(cpu, 0); if (cpu != task_cpu(task) && best_cpu_util == util) { if (best_cpu_idle_idx < cpu_idle_idx) continue; if (best_cpu_idle_idx == cpu_idle_idx && best_cpu_util_cum < util_cum) continue; } best_cpu_idle_idx = cpu_idle_idx; best_cpu_util_cum = util_cum; best_cpu_util = util; best_cpu = cpu; best_capacity = capacity_orig; } } while (sg = sg->next, sg != sd->groups); if (unlikely(boost_on_big) && best_cpu == -1) { boost_on_big = false; goto retry; } unlock: rcu_read_unlock(); return best_cpu; } #else static inline int rt_energy_aware_wake_cpu(struct task_struct *task) { return -1; } #endif static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); int cpu = -1; /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) Loading @@ -1651,6 +1863,12 @@ static int find_lowest_rq(struct task_struct *task) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ if (sched_energy_enabled()) cpu = rt_energy_aware_wake_cpu(task); if (cpu == -1) cpu = task_cpu(task); /* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect Loading Loading @@ -1857,7 +2075,9 @@ static int push_rt_task(struct rq *rq) } deactivate_task(rq, next_task, 0); next_task->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(next_task, lowest_rq->cpu); next_task->on_rq = TASK_ON_RQ_QUEUED; activate_task(lowest_rq, next_task, 0); ret = 1; Loading Loading @@ -2129,7 +2349,9 @@ static void pull_rt_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, this_cpu); p->on_rq = TASK_ON_RQ_QUEUED; activate_task(this_rq, p, 0); /* * We continue with the search, just in Loading kernel/sched/sched.h +5 −0 Original line number Diff line number Diff line Loading @@ -2460,6 +2460,11 @@ extern void set_rq_online (struct rq *rq); extern void set_rq_offline(struct rq *rq); extern bool sched_smp_initialized; /* * task_may_not_preempt - check whether a task may not be preemptible soon */ extern bool task_may_not_preempt(struct task_struct *task, int cpu); #else /* CONFIG_SMP */ /* Loading Loading
include/linux/interrupt.h +7 −0 Original line number Diff line number Diff line Loading @@ -528,6 +528,12 @@ enum }; #define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) /* Softirq's where the handling might be long: */ #define LONG_SOFTIRQ_MASK ((1 << NET_TX_SOFTIRQ) | \ (1 << NET_RX_SOFTIRQ) | \ (1 << BLOCK_SOFTIRQ) | \ (1 << IRQ_POLL_SOFTIRQ) | \ (1 << TASKLET_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. Loading Loading @@ -563,6 +569,7 @@ extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); DECLARE_PER_CPU(__u32, active_softirqs); static inline struct task_struct *this_cpu_ksoftirqd(void) { Loading
include/linux/sched.h +5 −0 Original line number Diff line number Diff line Loading @@ -1721,6 +1721,7 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_ #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern bool cpupri_check_rt(void); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { Loading @@ -1731,6 +1732,10 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma return -EINVAL; return 0; } static inline bool cpupri_check_rt(void) { return false; } #endif extern int yield_to(struct task_struct *p, bool preempt); Loading
kernel/sched/cpupri.c +44 −2 Original line number Diff line number Diff line Loading @@ -41,6 +41,27 @@ static int convert_prio(int prio) return cpupri; } /** * drop_nopreempt_cpus - remove a cpu from the mask if it is likely * non-preemptible * @lowest_mask: mask with selected CPUs (non-NULL) */ static void drop_nopreempt_cpus(struct cpumask *lowest_mask) { unsigned int cpu = cpumask_first(lowest_mask); while (cpu < nr_cpu_ids) { /* unlocked access */ struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr); if (task_may_not_preempt(task, cpu)) cpumask_clear_cpu(cpu, lowest_mask); cpu = cpumask_next(cpu, lowest_mask); } } /** * cpupri_find - find the best (lowest-pri) CPU in the system * @cp: The cpupri context Loading @@ -61,9 +82,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, { int idx = 0; int task_pri = convert_prio(p->prio); bool drop_nopreempts = task_pri <= MAX_RT_PRIO; BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); retry: for (idx = 0; idx < task_pri; idx++) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; int skip = 0; Loading Loading @@ -99,7 +122,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (lowest_mask) { cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); if (drop_nopreempts) drop_nopreempt_cpus(lowest_mask); /* * We have to ensure that we have at least one bit * still set in the array, since the map could have Loading @@ -114,7 +138,14 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, return 1; } /* * If we can't find any non-preemptible cpu's, retry so we can * find the lowest priority target and avoid priority inversion. */ if (drop_nopreempts) { drop_nopreempts = false; goto retry; } return 0; } Loading Loading @@ -235,3 +266,14 @@ void cpupri_cleanup(struct cpupri *cp) for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) free_cpumask_var(cp->pri_to_cpu[i].mask); } /* * cpupri_check_rt - check if CPU has a RT task * should be called from rcu-sched read section. */ bool cpupri_check_rt(void) { int cpu = raw_smp_processor_id(); return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL; }
kernel/sched/rt.c +231 −9 Original line number Diff line number Diff line Loading @@ -6,6 +6,9 @@ #include "sched.h" #include "pelt.h" #include <linux/interrupt.h> #include "walt.h" int sched_rr_timeslice = RR_TIMESLICE; Loading Loading @@ -910,6 +913,66 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) return rt_task_of(rt_se)->prio; } static void dump_throttled_rt_tasks(struct rt_rq *rt_rq) { struct rt_prio_array *array = &rt_rq->active; struct sched_rt_entity *rt_se; char buf[500]; char *pos = buf; char *end = buf + sizeof(buf); int idx; struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); pos += snprintf(pos, sizeof(buf), "sched: RT throttling activated for rt_rq %pK (cpu %d)\n", rt_rq, cpu_of(rq_of_rt_rq(rt_rq))); pos += snprintf(pos, end - pos, "rt_period_timer: expires=%lld now=%llu period=%llu\n", hrtimer_get_expires_ns(&rt_b->rt_period_timer), ktime_get_ns(), sched_rt_period(rt_rq)); if (bitmap_empty(array->bitmap, MAX_RT_PRIO)) goto out; pos += snprintf(pos, end - pos, "potential CPU hogs:\n"); #ifdef CONFIG_SCHED_INFO if (sched_info_on()) pos += snprintf(pos, end - pos, "current %s (%d) is running for %llu nsec\n", current->comm, current->pid, rq_clock(rq_of_rt_rq(rt_rq)) - current->sched_info.last_arrival); #endif idx = sched_find_first_bit(array->bitmap); while (idx < MAX_RT_PRIO) { list_for_each_entry(rt_se, array->queue + idx, run_list) { struct task_struct *p; if (!rt_entity_is_task(rt_se)) continue; p = rt_task_of(rt_se); if (pos < end) pos += snprintf(pos, end - pos, "\t%s (%d)\n", p->comm, p->pid); } idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1); } out: #ifdef CONFIG_PANIC_ON_RT_THROTTLING /* * Use pr_err() in the BUG() case since printk_sched() will * not get flushed and deadlock is not a concern. */ pr_err("%s\n", buf); BUG(); #else printk_deferred("%s\n", buf); #endif } static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); Loading @@ -933,8 +996,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { static bool once; rt_rq->rt_throttled = 1; printk_deferred_once("sched: RT throttling activated\n"); if (!once) { once = true; dump_throttled_rt_tasks(rt_rq); } } else { /* * In case we did anyway, make it go away, Loading Loading @@ -1339,6 +1408,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) rt_se->timeout = 0; enqueue_rt_entity(rt_se, flags); walt_inc_cumulative_runnable_avg(rq, p); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); Loading @@ -1350,6 +1420,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) update_curr_rt(rq); dequeue_rt_entity(rt_se, flags); walt_dec_cumulative_runnable_avg(rq, p); dequeue_pushable_task(rq, p); } Loading Loading @@ -1391,11 +1462,30 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); /* * Return whether the task on the given cpu is currently non-preemptible * while handling a potentially long softint, or if the task is likely * to block preemptions soon because it is a ksoftirq thread that is * handling slow softints. */ bool task_may_not_preempt(struct task_struct *task, int cpu) { __u32 softirqs = per_cpu(active_softirqs, cpu) | __IRQ_STAT(cpu, __softirq_pending); struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu); return ((softirqs & LONG_SOFTIRQ_MASK) && (task == cpu_ksoftirqd || task_thread_info(task)->preempt_count & SOFTIRQ_MASK)); } static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; bool may_not_preempt; /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) Loading @@ -1407,7 +1497,17 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) curr = READ_ONCE(rq->curr); /* unlocked access */ /* * If the current task on @p's runqueue is an RT task, then * If the current task on @p's runqueue is a softirq task, * it may run without preemption for a time that is * ill-suited for a waiting RT task. Therefore, try to * wake this RT task on another runqueue. * * Also, if the current task on @p's runqueue is an RT task, then * it may run without preemption for a time that is * ill-suited for a waiting RT task. Therefore, try to * wake this RT task on another runqueue. * * Also, if the current task on @p's runqueue is an RT task, then * try to see if we can wake this RT task up on another * runqueue. Otherwise simply start this RT task * on its current runqueue. Loading @@ -1428,17 +1528,22 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && may_not_preempt = task_may_not_preempt(curr, cpu); if (sched_energy_enabled() || may_not_preempt || (unlikely(rt_task(curr)) && (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio)) { curr->prio <= p->prio))) { int target = find_lowest_rq(p); /* * Don't bother moving it if the destination CPU is * not running a lower priority task. * If cpu is non-preemptible, prefer remote cpu * even if it's running a higher-prio task. * Otherwise: Don't bother moving it if the * destination CPU is not running a lower priority task. */ if (target != -1 && p->prio < cpu_rq(target)->rt.highest_prio.curr) (may_not_preempt || p->prio < cpu_rq(target)->rt.highest_prio.curr)) cpu = target; } rcu_read_unlock(); Loading Loading @@ -1634,12 +1739,119 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); #ifdef CONFIG_SCHED_WALT static int rt_energy_aware_wake_cpu(struct task_struct *task) { struct sched_domain *sd; struct sched_group *sg; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int cpu, best_cpu = -1; unsigned long best_capacity = ULONG_MAX; unsigned long util, best_cpu_util = ULONG_MAX; unsigned long best_cpu_util_cum = ULONG_MAX; unsigned long util_cum; unsigned long tutil = task_util(task); int best_cpu_idle_idx = INT_MAX; int cpu_idle_idx = -1; bool boost_on_big = rt_boost_on_big(); rcu_read_lock(); cpu = cpu_rq(smp_processor_id())->rd->min_cap_orig_cpu; if (cpu < 0) goto unlock; sd = rcu_dereference(*per_cpu_ptr(&sd_asym_cpucapacity, cpu)); if (!sd) goto unlock; retry: sg = sd->groups; do { int fcpu = group_first_cpu(sg); int capacity_orig = capacity_orig_of(fcpu); if (boost_on_big) { if (is_min_capacity_cpu(fcpu)) continue; } else { if (capacity_orig > best_capacity) continue; } for_each_cpu_and(cpu, lowest_mask, sched_group_span(sg)) { if (cpu_isolated(cpu)) continue; if (sched_cpu_high_irqload(cpu)) continue; util = cpu_util(cpu); if (__cpu_overutilized(cpu, util + tutil)) continue; /* Find the least loaded CPU */ if (util > best_cpu_util) continue; /* * If the previous CPU has same load, keep it as * best_cpu. */ if (best_cpu_util == util && best_cpu == task_cpu(task)) continue; /* * If candidate CPU is the previous CPU, select it. * Otherwise, if its load is same with best_cpu and in * a shallower C-state, select it. If all above * conditions are same, select the least cumulative * window demand CPU. */ cpu_idle_idx = idle_get_state_idx(cpu_rq(cpu)); util_cum = cpu_util_cum(cpu, 0); if (cpu != task_cpu(task) && best_cpu_util == util) { if (best_cpu_idle_idx < cpu_idle_idx) continue; if (best_cpu_idle_idx == cpu_idle_idx && best_cpu_util_cum < util_cum) continue; } best_cpu_idle_idx = cpu_idle_idx; best_cpu_util_cum = util_cum; best_cpu_util = util; best_cpu = cpu; best_capacity = capacity_orig; } } while (sg = sg->next, sg != sd->groups); if (unlikely(boost_on_big) && best_cpu == -1) { boost_on_big = false; goto retry; } unlock: rcu_read_unlock(); return best_cpu; } #else static inline int rt_energy_aware_wake_cpu(struct task_struct *task) { return -1; } #endif static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); int cpu = -1; /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) Loading @@ -1651,6 +1863,12 @@ static int find_lowest_rq(struct task_struct *task) if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ if (sched_energy_enabled()) cpu = rt_energy_aware_wake_cpu(task); if (cpu == -1) cpu = task_cpu(task); /* * At this point we have built a mask of CPUs representing the * lowest priority tasks in the system. Now we want to elect Loading Loading @@ -1857,7 +2075,9 @@ static int push_rt_task(struct rq *rq) } deactivate_task(rq, next_task, 0); next_task->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(next_task, lowest_rq->cpu); next_task->on_rq = TASK_ON_RQ_QUEUED; activate_task(lowest_rq, next_task, 0); ret = 1; Loading Loading @@ -2129,7 +2349,9 @@ static void pull_rt_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, this_cpu); p->on_rq = TASK_ON_RQ_QUEUED; activate_task(this_rq, p, 0); /* * We continue with the search, just in Loading
kernel/sched/sched.h +5 −0 Original line number Diff line number Diff line Loading @@ -2460,6 +2460,11 @@ extern void set_rq_online (struct rq *rq); extern void set_rq_offline(struct rq *rq); extern bool sched_smp_initialized; /* * task_may_not_preempt - check whether a task may not be preemptible soon */ extern bool task_may_not_preempt(struct task_struct *task, int cpu); #else /* CONFIG_SMP */ /* Loading