Merge "softirq: defer softirq processing to ksoftirqd if CPU is busy with RT" (fac6230f) · Commits · e / devices / android_kernel_fairphone_FP5

include/linux/interrupt.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -528,6 +528,12 @@ enum
		};

		#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
		/* Softirq's where the handling might be long: */
		#define LONG_SOFTIRQ_MASK ((1 << NET_TX_SOFTIRQ) \| \
		(1 << NET_RX_SOFTIRQ) \| \
		(1 << BLOCK_SOFTIRQ) \| \
		(1 << IRQ_POLL_SOFTIRQ) \| \
		(1 << TASKLET_SOFTIRQ))

		/* map softirq index to softirq name. update 'softirq_to_name' in
		* kernel/softirq.c when adding a new softirq.
		@@ -563,6 +569,7 @@ extern void raise_softirq_irqoff(unsigned int nr);
		extern void raise_softirq(unsigned int nr);

		DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
		DECLARE_PER_CPU(__u32, active_softirqs);

		static inline struct task_struct *this_cpu_ksoftirqd(void)
		{

include/linux/sched.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -1721,6 +1721,7 @@ extern int task_can_attach(struct task_struct p, const struct cpumask cs_cpus_
		#ifdef CONFIG_SMP
		extern void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask);
		extern int set_cpus_allowed_ptr(struct task_struct p, const struct cpumask new_mask);
		extern bool cpupri_check_rt(void);
		#else
		static inline void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
		{
		@@ -1731,6 +1732,10 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
		return -EINVAL;
		return 0;
		}
		static inline bool cpupri_check_rt(void)
		{
		return false;
		}
		#endif

		extern int yield_to(struct task_struct *p, bool preempt);

kernel/sched/cpupri.c

+44 −2

Original line number	Diff line number	Diff line
		@@ -41,6 +41,27 @@ static int convert_prio(int prio)
		return cpupri;
		}

		/**
		* drop_nopreempt_cpus - remove a cpu from the mask if it is likely
		* non-preemptible
		* @lowest_mask: mask with selected CPUs (non-NULL)
		*/
		static void
		drop_nopreempt_cpus(struct cpumask *lowest_mask)
		{
		unsigned int cpu = cpumask_first(lowest_mask);

		while (cpu < nr_cpu_ids) {
		/* unlocked access */
		struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr);

		if (task_may_not_preempt(task, cpu))
		cpumask_clear_cpu(cpu, lowest_mask);

		cpu = cpumask_next(cpu, lowest_mask);
		}
		}

		/**
		* cpupri_find - find the best (lowest-pri) CPU in the system
		* @cp: The cpupri context
		@@ -61,9 +82,11 @@ int cpupri_find(struct cpupri cp, struct task_struct p,
		{
		int idx = 0;
		int task_pri = convert_prio(p->prio);
		bool drop_nopreempts = task_pri <= MAX_RT_PRIO;

		BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);

		retry:
		for (idx = 0; idx < task_pri; idx++) {
		struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
		int skip = 0;
		@@ -99,7 +122,8 @@ int cpupri_find(struct cpupri cp, struct task_struct p,

		if (lowest_mask) {
		cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);

		if (drop_nopreempts)
		drop_nopreempt_cpus(lowest_mask);
		/*
		* We have to ensure that we have at least one bit
		* still set in the array, since the map could have
		@@ -114,7 +138,14 @@ int cpupri_find(struct cpupri cp, struct task_struct p,

		return 1;
		}

		/*
		* If we can't find any non-preemptible cpu's, retry so we can
		* find the lowest priority target and avoid priority inversion.
		*/
		if (drop_nopreempts) {
		drop_nopreempts = false;
		goto retry;
		}
		return 0;
		}

		@@ -235,3 +266,14 @@ void cpupri_cleanup(struct cpupri *cp)
		for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
		free_cpumask_var(cp->pri_to_cpu[i].mask);
		}

		/*
		* cpupri_check_rt - check if CPU has a RT task
		* should be called from rcu-sched read section.
		*/
		bool cpupri_check_rt(void)
		{
		int cpu = raw_smp_processor_id();

		return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL;
		}

kernel/sched/rt.c

+231 −9

Original line number	Diff line number	Diff line
		@@ -6,6 +6,9 @@
		#include "sched.h"

		#include "pelt.h"

		#include <linux/interrupt.h>

		#include "walt.h"

		int sched_rr_timeslice = RR_TIMESLICE;
		@@ -910,6 +913,66 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
		return rt_task_of(rt_se)->prio;
		}

		static void dump_throttled_rt_tasks(struct rt_rq *rt_rq)
		{
		struct rt_prio_array *array = &rt_rq->active;
		struct sched_rt_entity *rt_se;
		char buf[500];
		char *pos = buf;
		char *end = buf + sizeof(buf);
		int idx;
		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);

		pos += snprintf(pos, sizeof(buf),
		"sched: RT throttling activated for rt_rq %pK (cpu %d)\n",
		rt_rq, cpu_of(rq_of_rt_rq(rt_rq)));

		pos += snprintf(pos, end - pos,
		"rt_period_timer: expires=%lld now=%llu period=%llu\n",
		hrtimer_get_expires_ns(&rt_b->rt_period_timer),
		ktime_get_ns(), sched_rt_period(rt_rq));

		if (bitmap_empty(array->bitmap, MAX_RT_PRIO))
		goto out;

		pos += snprintf(pos, end - pos, "potential CPU hogs:\n");
		#ifdef CONFIG_SCHED_INFO
		if (sched_info_on())
		pos += snprintf(pos, end - pos,
		"current %s (%d) is running for %llu nsec\n",
		current->comm, current->pid,
		rq_clock(rq_of_rt_rq(rt_rq)) -
		current->sched_info.last_arrival);
		#endif

		idx = sched_find_first_bit(array->bitmap);
		while (idx < MAX_RT_PRIO) {
		list_for_each_entry(rt_se, array->queue + idx, run_list) {
		struct task_struct *p;

		if (!rt_entity_is_task(rt_se))
		continue;

		p = rt_task_of(rt_se);
		if (pos < end)
		pos += snprintf(pos, end - pos, "\t%s (%d)\n",
		p->comm, p->pid);
		}
		idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1);
		}
		out:
		#ifdef CONFIG_PANIC_ON_RT_THROTTLING
		/*
		* Use pr_err() in the BUG() case since printk_sched() will
		* not get flushed and deadlock is not a concern.
		*/
		pr_err("%s\n", buf);
		BUG();
		#else
		printk_deferred("%s\n", buf);
		#endif
		}

		static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
		{
		u64 runtime = sched_rt_runtime(rt_rq);
		@@ -933,8 +996,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
		* but accrue some time due to boosting.
		*/
		if (likely(rt_b->rt_runtime)) {
		static bool once;

		rt_rq->rt_throttled = 1;
		printk_deferred_once("sched: RT throttling activated\n");

		if (!once) {
		once = true;
		dump_throttled_rt_tasks(rt_rq);
		}
		} else {
		/*
		* In case we did anyway, make it go away,
		@@ -1339,6 +1408,7 @@ enqueue_task_rt(struct rq rq, struct task_struct p, int flags)
		rt_se->timeout = 0;

		enqueue_rt_entity(rt_se, flags);
		walt_inc_cumulative_runnable_avg(rq, p);

		if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
		enqueue_pushable_task(rq, p);
		@@ -1350,6 +1420,7 @@ static void dequeue_task_rt(struct rq rq, struct task_struct p, int flags)

		update_curr_rt(rq);
		dequeue_rt_entity(rt_se, flags);
		walt_dec_cumulative_runnable_avg(rq, p);

		dequeue_pushable_task(rq, p);
		}
		@@ -1391,11 +1462,30 @@ static void yield_task_rt(struct rq *rq)
		#ifdef CONFIG_SMP
		static int find_lowest_rq(struct task_struct *task);

		/*
		* Return whether the task on the given cpu is currently non-preemptible
		* while handling a potentially long softint, or if the task is likely
		* to block preemptions soon because it is a ksoftirq thread that is
		* handling slow softints.
		*/
		bool
		task_may_not_preempt(struct task_struct *task, int cpu)
		{
		__u32 softirqs = per_cpu(active_softirqs, cpu) \|
		__IRQ_STAT(cpu, __softirq_pending);
		struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu);

		return ((softirqs & LONG_SOFTIRQ_MASK) &&
		(task == cpu_ksoftirqd \|\|
		task_thread_info(task)->preempt_count & SOFTIRQ_MASK));
		}

		static int
		select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
		{
		struct task_struct *curr;
		struct rq *rq;
		bool may_not_preempt;

		/* For anything but wake ups, just return the task_cpu */
		if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
		@@ -1407,7 +1497,17 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
		curr = READ_ONCE(rq->curr); /* unlocked access */

		/*
		* If the current task on @p's runqueue is an RT task, then
		* If the current task on @p's runqueue is a softirq task,
		* it may run without preemption for a time that is
		* ill-suited for a waiting RT task. Therefore, try to
		* wake this RT task on another runqueue.
		*
		* Also, if the current task on @p's runqueue is an RT task, then
		* it may run without preemption for a time that is
		* ill-suited for a waiting RT task. Therefore, try to
		* wake this RT task on another runqueue.
		*
		* Also, if the current task on @p's runqueue is an RT task, then
		* try to see if we can wake this RT task up on another
		* runqueue. Otherwise simply start this RT task
		* on its current runqueue.
		@@ -1428,17 +1528,22 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
		* This test is optimistic, if we get it wrong the load-balancer
		* will have to sort it out.
		*/
		if (curr && unlikely(rt_task(curr)) &&
		may_not_preempt = task_may_not_preempt(curr, cpu);
		if (sched_energy_enabled() \|\| may_not_preempt \|\|
		(unlikely(rt_task(curr)) &&
		(curr->nr_cpus_allowed < 2 \|\|
		curr->prio <= p->prio)) {
		curr->prio <= p->prio))) {
		int target = find_lowest_rq(p);

		/*
		* Don't bother moving it if the destination CPU is
		* not running a lower priority task.
		* If cpu is non-preemptible, prefer remote cpu
		* even if it's running a higher-prio task.
		* Otherwise: Don't bother moving it if the
		* destination CPU is not running a lower priority task.
		*/
		if (target != -1 &&
		p->prio < cpu_rq(target)->rt.highest_prio.curr)
		(may_not_preempt \|\|
		p->prio < cpu_rq(target)->rt.highest_prio.curr))
		cpu = target;
		}
		rcu_read_unlock();
		@@ -1634,12 +1739,119 @@ static struct task_struct pick_highest_pushable_task(struct rq rq, int cpu)

		static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);

		#ifdef CONFIG_SCHED_WALT
		static int rt_energy_aware_wake_cpu(struct task_struct *task)
		{
		struct sched_domain *sd;
		struct sched_group *sg;
		struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
		int cpu, best_cpu = -1;
		unsigned long best_capacity = ULONG_MAX;
		unsigned long util, best_cpu_util = ULONG_MAX;
		unsigned long best_cpu_util_cum = ULONG_MAX;
		unsigned long util_cum;
		unsigned long tutil = task_util(task);
		int best_cpu_idle_idx = INT_MAX;
		int cpu_idle_idx = -1;
		bool boost_on_big = rt_boost_on_big();

		rcu_read_lock();

		cpu = cpu_rq(smp_processor_id())->rd->min_cap_orig_cpu;
		if (cpu < 0)
		goto unlock;

		sd = rcu_dereference(*per_cpu_ptr(&sd_asym_cpucapacity, cpu));
		if (!sd)
		goto unlock;

		retry:
		sg = sd->groups;
		do {
		int fcpu = group_first_cpu(sg);
		int capacity_orig = capacity_orig_of(fcpu);

		if (boost_on_big) {
		if (is_min_capacity_cpu(fcpu))
		continue;
		} else {
		if (capacity_orig > best_capacity)
		continue;
		}

		for_each_cpu_and(cpu, lowest_mask, sched_group_span(sg)) {
		if (cpu_isolated(cpu))
		continue;

		if (sched_cpu_high_irqload(cpu))
		continue;

		util = cpu_util(cpu);

		if (__cpu_overutilized(cpu, util + tutil))
		continue;

		/* Find the least loaded CPU */
		if (util > best_cpu_util)
		continue;

		/*
		* If the previous CPU has same load, keep it as
		* best_cpu.
		*/
		if (best_cpu_util == util && best_cpu == task_cpu(task))
		continue;

		/*
		* If candidate CPU is the previous CPU, select it.
		* Otherwise, if its load is same with best_cpu and in
		* a shallower C-state, select it. If all above
		* conditions are same, select the least cumulative
		* window demand CPU.
		*/
		cpu_idle_idx = idle_get_state_idx(cpu_rq(cpu));

		util_cum = cpu_util_cum(cpu, 0);
		if (cpu != task_cpu(task) && best_cpu_util == util) {
		if (best_cpu_idle_idx < cpu_idle_idx)
		continue;

		if (best_cpu_idle_idx == cpu_idle_idx &&
		best_cpu_util_cum < util_cum)
		continue;
		}

		best_cpu_idle_idx = cpu_idle_idx;
		best_cpu_util_cum = util_cum;
		best_cpu_util = util;
		best_cpu = cpu;
		best_capacity = capacity_orig;
		}

		} while (sg = sg->next, sg != sd->groups);

		if (unlikely(boost_on_big) && best_cpu == -1) {
		boost_on_big = false;
		goto retry;
		}

		unlock:
		rcu_read_unlock();
		return best_cpu;
		}
		#else
		static inline int rt_energy_aware_wake_cpu(struct task_struct *task)
		{
		return -1;
		}
		#endif

		static int find_lowest_rq(struct task_struct *task)
		{
		struct sched_domain *sd;
		struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
		int this_cpu = smp_processor_id();
		int cpu = task_cpu(task);
		int cpu = -1;

		/* Make sure the mask is initialized first */
		if (unlikely(!lowest_mask))
		@@ -1651,6 +1863,12 @@ static int find_lowest_rq(struct task_struct *task)
		if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
		return -1; /* No targets found */

		if (sched_energy_enabled())
		cpu = rt_energy_aware_wake_cpu(task);

		if (cpu == -1)
		cpu = task_cpu(task);

		/*
		* At this point we have built a mask of CPUs representing the
		* lowest priority tasks in the system. Now we want to elect
		@@ -1857,7 +2075,9 @@ static int push_rt_task(struct rq *rq)
		}

		deactivate_task(rq, next_task, 0);
		next_task->on_rq = TASK_ON_RQ_MIGRATING;
		set_task_cpu(next_task, lowest_rq->cpu);
		next_task->on_rq = TASK_ON_RQ_QUEUED;
		activate_task(lowest_rq, next_task, 0);
		ret = 1;

		@@ -2129,7 +2349,9 @@ static void pull_rt_task(struct rq *this_rq)
		resched = true;

		deactivate_task(src_rq, p, 0);
		p->on_rq = TASK_ON_RQ_MIGRATING;
		set_task_cpu(p, this_cpu);
		p->on_rq = TASK_ON_RQ_QUEUED;
		activate_task(this_rq, p, 0);
		/*
		* We continue with the search, just in

kernel/sched/sched.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -2460,6 +2460,11 @@ extern void set_rq_online (struct rq *rq);
		extern void set_rq_offline(struct rq *rq);
		extern bool sched_smp_initialized;

		/*
		* task_may_not_preempt - check whether a task may not be preemptible soon
		*/
		extern bool task_may_not_preempt(struct task_struct *task, int cpu);

		#else /* CONFIG_SMP */

		/*