Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fac6230f authored by qctecmdr's avatar qctecmdr Committed by Gerrit - the friendly Code Review server
Browse files

Merge "softirq: defer softirq processing to ksoftirqd if CPU is busy with RT"

parents 9dd14571 db6940d5
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -528,6 +528,12 @@ enum
};

#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
/* Softirq's where the handling might be long: */
#define LONG_SOFTIRQ_MASK ((1 << NET_TX_SOFTIRQ)       | \
			   (1 << NET_RX_SOFTIRQ)       | \
			   (1 << BLOCK_SOFTIRQ)        | \
			   (1 << IRQ_POLL_SOFTIRQ)     | \
			   (1 << TASKLET_SOFTIRQ))

/* map softirq index to softirq name. update 'softirq_to_name' in
 * kernel/softirq.c when adding a new softirq.
@@ -563,6 +569,7 @@ extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);

DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
DECLARE_PER_CPU(__u32, active_softirqs);

static inline struct task_struct *this_cpu_ksoftirqd(void)
{
+5 −0
Original line number Diff line number Diff line
@@ -1721,6 +1721,7 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
#ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
extern bool cpupri_check_rt(void);
#else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
@@ -1731,6 +1732,10 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
		return -EINVAL;
	return 0;
}
static inline bool cpupri_check_rt(void)
{
	return false;
}
#endif

extern int yield_to(struct task_struct *p, bool preempt);
+44 −2
Original line number Diff line number Diff line
@@ -41,6 +41,27 @@ static int convert_prio(int prio)
	return cpupri;
}

/**
 * drop_nopreempt_cpus - remove a cpu from the mask if it is likely
 *			 non-preemptible
 * @lowest_mask: mask with selected CPUs (non-NULL)
 */
static void
drop_nopreempt_cpus(struct cpumask *lowest_mask)
{
	unsigned int cpu = cpumask_first(lowest_mask);

	while (cpu < nr_cpu_ids) {
		/* unlocked access */
		struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr);

		if (task_may_not_preempt(task, cpu))
			cpumask_clear_cpu(cpu, lowest_mask);

		cpu = cpumask_next(cpu, lowest_mask);
	}
}

/**
 * cpupri_find - find the best (lowest-pri) CPU in the system
 * @cp: The cpupri context
@@ -61,9 +82,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
{
	int idx = 0;
	int task_pri = convert_prio(p->prio);
	bool drop_nopreempts = task_pri <= MAX_RT_PRIO;

	BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);

retry:
	for (idx = 0; idx < task_pri; idx++) {
		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
		int skip = 0;
@@ -99,7 +122,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,

		if (lowest_mask) {
			cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);

			if (drop_nopreempts)
				drop_nopreempt_cpus(lowest_mask);
			/*
			 * We have to ensure that we have at least one bit
			 * still set in the array, since the map could have
@@ -114,7 +138,14 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,

		return 1;
	}

	/*
	 * If we can't find any non-preemptible cpu's, retry so we can
	 * find the lowest priority target and avoid priority inversion.
	 */
	if (drop_nopreempts) {
		drop_nopreempts = false;
		goto retry;
	}
	return 0;
}

@@ -235,3 +266,14 @@ void cpupri_cleanup(struct cpupri *cp)
	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
		free_cpumask_var(cp->pri_to_cpu[i].mask);
}

/*
 * cpupri_check_rt - check if CPU has a RT task
 * should be called from rcu-sched read section.
 */
bool cpupri_check_rt(void)
{
	int cpu = raw_smp_processor_id();

	return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL;
}
+231 −9
Original line number Diff line number Diff line
@@ -6,6 +6,9 @@
#include "sched.h"

#include "pelt.h"

#include <linux/interrupt.h>

#include "walt.h"

int sched_rr_timeslice = RR_TIMESLICE;
@@ -910,6 +913,66 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
	return rt_task_of(rt_se)->prio;
}

static void dump_throttled_rt_tasks(struct rt_rq *rt_rq)
{
	struct rt_prio_array *array = &rt_rq->active;
	struct sched_rt_entity *rt_se;
	char buf[500];
	char *pos = buf;
	char *end = buf + sizeof(buf);
	int idx;
	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);

	pos += snprintf(pos, sizeof(buf),
		"sched: RT throttling activated for rt_rq %pK (cpu %d)\n",
		rt_rq, cpu_of(rq_of_rt_rq(rt_rq)));

	pos += snprintf(pos, end - pos,
			"rt_period_timer: expires=%lld now=%llu period=%llu\n",
			hrtimer_get_expires_ns(&rt_b->rt_period_timer),
			ktime_get_ns(), sched_rt_period(rt_rq));

	if (bitmap_empty(array->bitmap, MAX_RT_PRIO))
		goto out;

	pos += snprintf(pos, end - pos, "potential CPU hogs:\n");
#ifdef CONFIG_SCHED_INFO
	if (sched_info_on())
		pos += snprintf(pos, end - pos,
				"current %s (%d) is running for %llu nsec\n",
				current->comm, current->pid,
				rq_clock(rq_of_rt_rq(rt_rq)) -
				current->sched_info.last_arrival);
#endif

	idx = sched_find_first_bit(array->bitmap);
	while (idx < MAX_RT_PRIO) {
		list_for_each_entry(rt_se, array->queue + idx, run_list) {
			struct task_struct *p;

			if (!rt_entity_is_task(rt_se))
				continue;

			p = rt_task_of(rt_se);
			if (pos < end)
				pos += snprintf(pos, end - pos, "\t%s (%d)\n",
					p->comm, p->pid);
		}
		idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1);
	}
out:
#ifdef CONFIG_PANIC_ON_RT_THROTTLING
	/*
	 * Use pr_err() in the BUG() case since printk_sched() will
	 * not get flushed and deadlock is not a concern.
	 */
	pr_err("%s\n", buf);
	BUG();
#else
	printk_deferred("%s\n", buf);
#endif
}

static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
	u64 runtime = sched_rt_runtime(rt_rq);
@@ -933,8 +996,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
		 * but accrue some time due to boosting.
		 */
		if (likely(rt_b->rt_runtime)) {
			static bool once;

			rt_rq->rt_throttled = 1;
			printk_deferred_once("sched: RT throttling activated\n");

			if (!once) {
				once = true;
				dump_throttled_rt_tasks(rt_rq);
			}
		} else {
			/*
			 * In case we did anyway, make it go away,
@@ -1339,6 +1408,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
		rt_se->timeout = 0;

	enqueue_rt_entity(rt_se, flags);
	walt_inc_cumulative_runnable_avg(rq, p);

	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
		enqueue_pushable_task(rq, p);
@@ -1350,6 +1420,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)

	update_curr_rt(rq);
	dequeue_rt_entity(rt_se, flags);
	walt_dec_cumulative_runnable_avg(rq, p);

	dequeue_pushable_task(rq, p);
}
@@ -1391,11 +1462,30 @@ static void yield_task_rt(struct rq *rq)
#ifdef CONFIG_SMP
static int find_lowest_rq(struct task_struct *task);

/*
 * Return whether the task on the given cpu is currently non-preemptible
 * while handling a potentially long softint, or if the task is likely
 * to block preemptions soon because it is a ksoftirq thread that is
 * handling slow softints.
 */
bool
task_may_not_preempt(struct task_struct *task, int cpu)
{
	__u32 softirqs = per_cpu(active_softirqs, cpu) |
			 __IRQ_STAT(cpu, __softirq_pending);
	struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu);

	return ((softirqs & LONG_SOFTIRQ_MASK) &&
		(task == cpu_ksoftirqd ||
		 task_thread_info(task)->preempt_count & SOFTIRQ_MASK));
}

static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
	struct task_struct *curr;
	struct rq *rq;
	bool may_not_preempt;

	/* For anything but wake ups, just return the task_cpu */
	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
@@ -1407,7 +1497,17 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
	curr = READ_ONCE(rq->curr); /* unlocked access */

	/*
	 * If the current task on @p's runqueue is an RT task, then
	 * If the current task on @p's runqueue is a softirq task,
	 * it may run without preemption for a time that is
	 * ill-suited for a waiting RT task. Therefore, try to
	 * wake this RT task on another runqueue.
	 *
	 * Also, if the current task on @p's runqueue is an RT task, then
	 * it may run without preemption for a time that is
	 * ill-suited for a waiting RT task. Therefore, try to
	 * wake this RT task on another runqueue.
	 *
	 * Also, if the current task on @p's runqueue is an RT task, then
	 * try to see if we can wake this RT task up on another
	 * runqueue. Otherwise simply start this RT task
	 * on its current runqueue.
@@ -1428,17 +1528,22 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
	 * This test is optimistic, if we get it wrong the load-balancer
	 * will have to sort it out.
	 */
	if (curr && unlikely(rt_task(curr)) &&
	may_not_preempt = task_may_not_preempt(curr, cpu);
	if (sched_energy_enabled() || may_not_preempt ||
	    (unlikely(rt_task(curr)) &&
	     (curr->nr_cpus_allowed < 2 ||
	     curr->prio <= p->prio)) {
	      curr->prio <= p->prio))) {
		int target = find_lowest_rq(p);

		/*
		 * Don't bother moving it if the destination CPU is
		 * not running a lower priority task.
		 * If cpu is non-preemptible, prefer remote cpu
		 * even if it's running a higher-prio task.
		 * Otherwise: Don't bother moving it if the
		 * destination CPU is not running a lower priority task.
		 */
		if (target != -1 &&
		    p->prio < cpu_rq(target)->rt.highest_prio.curr)
		    (may_not_preempt ||
		     p->prio < cpu_rq(target)->rt.highest_prio.curr))
			cpu = target;
	}
	rcu_read_unlock();
@@ -1634,12 +1739,119 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)

static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);

#ifdef CONFIG_SCHED_WALT
static int rt_energy_aware_wake_cpu(struct task_struct *task)
{
	struct sched_domain *sd;
	struct sched_group *sg;
	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
	int cpu, best_cpu = -1;
	unsigned long best_capacity = ULONG_MAX;
	unsigned long util, best_cpu_util = ULONG_MAX;
	unsigned long best_cpu_util_cum = ULONG_MAX;
	unsigned long util_cum;
	unsigned long tutil = task_util(task);
	int best_cpu_idle_idx = INT_MAX;
	int cpu_idle_idx = -1;
	bool boost_on_big = rt_boost_on_big();

	rcu_read_lock();

	cpu = cpu_rq(smp_processor_id())->rd->min_cap_orig_cpu;
	if (cpu < 0)
		goto unlock;

	sd = rcu_dereference(*per_cpu_ptr(&sd_asym_cpucapacity, cpu));
	if (!sd)
		goto unlock;

retry:
	sg = sd->groups;
	do {
		int fcpu = group_first_cpu(sg);
		int capacity_orig = capacity_orig_of(fcpu);

		if (boost_on_big) {
			if (is_min_capacity_cpu(fcpu))
				continue;
		} else {
			if (capacity_orig > best_capacity)
				continue;
		}

		for_each_cpu_and(cpu, lowest_mask, sched_group_span(sg)) {
			if (cpu_isolated(cpu))
				continue;

			if (sched_cpu_high_irqload(cpu))
				continue;

			util = cpu_util(cpu);

			if (__cpu_overutilized(cpu, util + tutil))
				continue;

			/* Find the least loaded CPU */
			if (util > best_cpu_util)
				continue;

			/*
			 * If the previous CPU has same load, keep it as
			 * best_cpu.
			 */
			if (best_cpu_util == util && best_cpu == task_cpu(task))
				continue;

			/*
			 * If candidate CPU is the previous CPU, select it.
			 * Otherwise, if its load is same with best_cpu and in
			 * a shallower C-state, select it.  If all above
			 * conditions are same, select the least cumulative
			 * window demand CPU.
			 */
			cpu_idle_idx = idle_get_state_idx(cpu_rq(cpu));

			util_cum = cpu_util_cum(cpu, 0);
			if (cpu != task_cpu(task) && best_cpu_util == util) {
				if (best_cpu_idle_idx < cpu_idle_idx)
					continue;

				if (best_cpu_idle_idx == cpu_idle_idx &&
						best_cpu_util_cum < util_cum)
					continue;
			}

			best_cpu_idle_idx = cpu_idle_idx;
			best_cpu_util_cum = util_cum;
			best_cpu_util = util;
			best_cpu = cpu;
			best_capacity = capacity_orig;
		}

	} while (sg = sg->next, sg != sd->groups);

	if (unlikely(boost_on_big) && best_cpu == -1) {
		boost_on_big = false;
		goto retry;
	}

unlock:
	rcu_read_unlock();
	return best_cpu;
}
#else
static inline int rt_energy_aware_wake_cpu(struct task_struct *task)
{
	return -1;
}
#endif

static int find_lowest_rq(struct task_struct *task)
{
	struct sched_domain *sd;
	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
	int this_cpu = smp_processor_id();
	int cpu      = task_cpu(task);
	int cpu = -1;

	/* Make sure the mask is initialized first */
	if (unlikely(!lowest_mask))
@@ -1651,6 +1863,12 @@ static int find_lowest_rq(struct task_struct *task)
	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
		return -1; /* No targets found */

	if (sched_energy_enabled())
		cpu = rt_energy_aware_wake_cpu(task);

	if (cpu == -1)
		cpu = task_cpu(task);

	/*
	 * At this point we have built a mask of CPUs representing the
	 * lowest priority tasks in the system.  Now we want to elect
@@ -1857,7 +2075,9 @@ static int push_rt_task(struct rq *rq)
	}

	deactivate_task(rq, next_task, 0);
	next_task->on_rq = TASK_ON_RQ_MIGRATING;
	set_task_cpu(next_task, lowest_rq->cpu);
	next_task->on_rq = TASK_ON_RQ_QUEUED;
	activate_task(lowest_rq, next_task, 0);
	ret = 1;

@@ -2129,7 +2349,9 @@ static void pull_rt_task(struct rq *this_rq)
			resched = true;

			deactivate_task(src_rq, p, 0);
			p->on_rq = TASK_ON_RQ_MIGRATING;
			set_task_cpu(p, this_cpu);
			p->on_rq = TASK_ON_RQ_QUEUED;
			activate_task(this_rq, p, 0);
			/*
			 * We continue with the search, just in
+5 −0
Original line number Diff line number Diff line
@@ -2460,6 +2460,11 @@ extern void set_rq_online (struct rq *rq);
extern void set_rq_offline(struct rq *rq);
extern bool sched_smp_initialized;

/*
 * task_may_not_preempt - check whether a task may not be preemptible soon
 */
extern bool task_may_not_preempt(struct task_struct *task, int cpu);

#else /* CONFIG_SMP */

/*
Loading