Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 34e4e6c5 authored by Syed Rameez Mustafa's avatar Syed Rameez Mustafa
Browse files

Merge remote-tracking branch '318/dev/msm-3.18-sched' into msm-3.18



* 318/dev/msm-3.18-sched:
  sched: restrict sync wakee placement bias with waker's demand
  sched: add preference for waker cluster CPU in wakee task placement
  sched/core: Add protection against null-pointer dereference
  sched: allow select_prev_cpu_us to be set to values greater than 100us

Change-Id: I02831c21998a5e58e33d76e0496ea564ae4f0428
Signed-off-by: default avatarSyed Rameez Mustafa <rameezmustafa@codeaurora.org>
parents 2ab415c7 bb11725f
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -1280,6 +1280,29 @@ power-efficient cpu. We don't let it come back to a power-efficient cpu until
its demand *in reference to the power-efficient cpu* drops less than 60%
(sched_downmigrate).


*** 7.26 sched_small_wakee_task_load

Appears at: /proc/sys/kernel/sched_small_wakee_task_load

Default value: 10

This tunable is a percentage.  Configure the maximum demand of small wakee task.
Sync wakee tasks which have demand less than sched_small_wakee_task_load are
categorized as small wakee tasks.  Scheduler places small wakee tasks on the
waker's cluster.


*** 7.26 sched_big_waker_task_load

Appears at: /proc/sys/kernel/sched_big_waker_task_load

Default value: 25

This tunable is a percentage.  Configure the minimum demand of big sync waker
task.  Scheduler places small wakee tasks woken up by big sync waker on the
waker's cluster.

=========================
8. HMP SCHEDULER TRACE POINTS
=========================
+2 −0
Original line number Diff line number Diff line
@@ -67,6 +67,8 @@ extern unsigned int sysctl_sched_downmigrate_pct;
extern int sysctl_sched_upmigrate_min_nice;
extern unsigned int sysctl_sched_boost;
extern unsigned int sysctl_early_detection_duration;
extern unsigned int sysctl_sched_small_wakee_task_load_pct;
extern unsigned int sysctl_sched_big_waker_task_load_pct;

#ifdef CONFIG_SCHED_QHMP
extern unsigned int sysctl_sched_min_runtime;
+14 −9
Original line number Diff line number Diff line
@@ -3373,7 +3373,7 @@ static void remove_task_from_group(struct task_struct *p)

	rq = __task_rq_lock(p);
	list_del_init(&p->grp_list);
	p->grp = NULL;
	rcu_assign_pointer(p->grp, NULL);
	__task_rq_unlock(rq);

	if (!list_empty(&grp->tasks)) {
@@ -3403,7 +3403,7 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
	 * reference of p->grp in various hot-paths
	 */
	rq = __task_rq_lock(p);
	p->grp = grp;
	rcu_assign_pointer(p->grp, grp);
	list_add(&p->grp_list, &grp->tasks);
	__task_rq_unlock(rq);

@@ -3472,12 +3472,13 @@ done:

unsigned int sched_get_group_id(struct task_struct *p)
{
	unsigned long flags;
	unsigned int group_id;
	struct related_thread_group *grp;

	raw_spin_lock_irqsave(&p->pi_lock, flags);
	group_id = p->grp ? p->grp->id : 0;
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
	rcu_read_lock();
	grp = task_related_thread_group(p);
	group_id = grp ? grp->id : 0;
	rcu_read_unlock();

	return group_id;
}
@@ -3667,7 +3668,7 @@ static inline int update_preferred_cluster(struct related_thread_group *grp,
	 * has passed since we last updated preference
	 */
	if (abs(new_load - old_load) > sched_ravg_window / 4 ||
		sched_ktime_clock() - p->grp->last_update > sched_ravg_window)
		sched_ktime_clock() - grp->last_update > sched_ravg_window)
		return 1;

	return 0;
@@ -4401,15 +4402,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)

	raw_spin_lock(&rq->lock);
	old_load = task_load(p);
	grp = task_related_thread_group(p);
	wallclock = sched_ktime_clock();
	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
	heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE);
	update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
	raw_spin_unlock(&rq->lock);

	rcu_read_lock();
	grp = task_related_thread_group(p);
	if (update_preferred_cluster(grp, p, old_load))
		set_preferred_cluster(grp);
	rcu_read_unlock();

	p->sched_contributes_to_load = !!task_contributes_to_load(p);
	p->state = TASK_WAKING;
@@ -5358,7 +5361,6 @@ void scheduler_tick(void)

	raw_spin_lock(&rq->lock);
	old_load = task_load(curr);
	grp = task_related_thread_group(curr);
	set_window_start(rq);
	update_rq_clock(rq);
	curr->sched_class->task_tick(rq, curr, 0);
@@ -5380,8 +5382,11 @@ void scheduler_tick(void)
#endif
	rq_last_tick_reset(rq);

	rcu_read_lock();
	grp = task_related_thread_group(curr);
	if (update_preferred_cluster(grp, curr, old_load))
		set_preferred_cluster(grp);
	rcu_read_unlock();

	if (curr->sched_class == &fair_sched_class)
		check_for_migration(rq, curr);
+83 −20
Original line number Diff line number Diff line
@@ -2469,6 +2469,16 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
 */
unsigned int __read_mostly sysctl_sched_enable_power_aware = 0;

/*
 * Place sync wakee tasks those have less than configured demand to the waker's
 * cluster.
 */
unsigned int __read_mostly sched_small_wakee_task_load;
unsigned int __read_mostly sysctl_sched_small_wakee_task_load_pct = 10;

unsigned int __read_mostly sched_big_waker_task_load;
unsigned int __read_mostly sysctl_sched_big_waker_task_load_pct = 25;

/*
 * CPUs with load greater than the sched_spill_load_threshold are not
 * eligible for task placement. When all CPUs in a cluster achieve a
@@ -2595,6 +2605,14 @@ void set_hmp_defaults(void)

	sched_short_sleep_task_threshold = sysctl_sched_select_prev_cpu_us *
					   NSEC_PER_USEC;

	sched_small_wakee_task_load =
		div64_u64((u64)sysctl_sched_small_wakee_task_load_pct *
			  (u64)sched_ravg_window, 100);

	sched_big_waker_task_load =
		div64_u64((u64)sysctl_sched_big_waker_task_load_pct *
			  (u64)sched_ravg_window, 100);
}

u32 sched_get_init_task_load(struct task_struct *p)
@@ -2871,6 +2889,7 @@ struct cpu_select_env {
	struct related_thread_group *rtg;
	u8 reason;
	u8 need_idle:1;
	u8 need_waker_cluster:1;
	u8 boost:1;
	u8 sync:1;
	u8 ignore_prev_cpu:1;
@@ -2910,7 +2929,7 @@ preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)

	rcu_read_lock();

	grp = p->grp;
	grp = task_related_thread_group(p);
	if (!grp || !sysctl_sched_enable_colocation)
		rc = 1;
	else
@@ -3036,15 +3055,29 @@ next_candidate(const unsigned long *list, int start, int end)
	return sched_cluster[cluster_id];
}

static void update_spare_capacity(
struct cluster_cpu_stats *stats, int cpu, int capacity, u64 cpu_load)
static void
update_spare_capacity(struct cluster_cpu_stats *stats,
		      struct cpu_select_env *env, int cpu, int capacity,
		      u64 cpu_load)
{
	s64 spare_capacity = sched_ravg_window - cpu_load;

	if (spare_capacity > 0 &&
	    (spare_capacity > stats->highest_spare_capacity ||
	     (spare_capacity == stats->highest_spare_capacity &&
	     capacity > cpu_capacity(stats->best_capacity_cpu)))) {
	      ((!env->need_waker_cluster &&
		capacity > cpu_capacity(stats->best_capacity_cpu)) ||
	       (env->need_waker_cluster &&
		cpu_rq(cpu)->nr_running <
		cpu_rq(stats->best_capacity_cpu)->nr_running))))) {
		/*
		 * If sync waker is the only runnable of CPU, cr_avg of the
		 * CPU is 0 so we have high chance to place the wakee on the
		 * waker's CPU which likely causes preemtion of the waker.
		 * This can lead migration of preempted waker.  Place the
		 * wakee on the real idle CPU when it's possible by checking
		 * nr_running to avoid such preemption.
		 */
		stats->highest_spare_capacity = spare_capacity;
		stats->best_capacity_cpu = cpu;
	}
@@ -3064,7 +3097,7 @@ struct cpu_select_env *env, struct cluster_cpu_stats *stats)
			sched_irqload(i), power_cost(i, task_load(env->p) +
					cpu_cravg_sync(i, env->sync)), 0);

			update_spare_capacity(stats, i, next->capacity,
			update_spare_capacity(stats, env, i, next->capacity,
					  cpu_load_sync(i, env->sync));
		}
	}
@@ -3162,9 +3195,11 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
		if (unlikely(!cpu_active(i)) || skip_cpu(i, env))
			continue;

		update_spare_capacity(stats, i, c->capacity, env->cpu_load);
		update_spare_capacity(stats, env, i, c->capacity,
				      env->cpu_load);

		if (env->boost || sched_cpu_high_irqload(i) ||
		if (env->boost || env->need_waker_cluster ||
		    sched_cpu_high_irqload(i) ||
		    spill_threshold_crossed(env, cpu_rq(i)))
			continue;

@@ -3235,7 +3270,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
	env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
	if (sched_cpu_high_irqload(prev_cpu) ||
			spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
		update_spare_capacity(stats, prev_cpu,
		update_spare_capacity(stats, env, prev_cpu,
				cluster->capacity, env->cpu_load);
		env->ignore_prev_cpu = 1;
		return false;
@@ -3244,6 +3279,14 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
	return true;
}

static inline bool
wake_to_waker_cluster(struct cpu_select_env *env)
{
	return !env->need_idle && !env->reason && env->sync &&
	       task_load(current) > sched_big_waker_task_load &&
	       task_load(env->p) < sched_small_wakee_task_load;
}

static inline int
cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
{
@@ -3269,6 +3312,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		.p			= p,
		.reason			= reason,
		.need_idle		= wake_to_idle(p),
		.need_waker_cluster	= 0,
		.boost			= sched_boost(),
		.sync			= sync,
		.prev_cpu		= target,
@@ -3283,7 +3327,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,

	rcu_read_lock();

	grp = p->grp;
	grp = task_related_thread_group(p);

	if (grp && grp->preferred_cluster) {
		pref_cluster = grp->preferred_cluster;
@@ -3291,10 +3335,18 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
			clear_bit(pref_cluster->id, env.candidate_list);
		else
			env.rtg = grp;
	} else {
		cluster = cpu_rq(smp_processor_id())->cluster;
		if (wake_to_waker_cluster(&env) &&
		    cluster_allowed(p, cluster)) {
			env.need_waker_cluster = 1;
			bitmap_zero(env.candidate_list, NR_CPUS);
			__set_bit(cluster->id, env.candidate_list);
		} else if (bias_to_prev_cpu(&env, &stats)) {
			fast_path = true;
			goto out;
		}
	}

retry:
	cluster = select_least_power_cluster(&env);
@@ -3763,8 +3815,11 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
			goto done;
		}
		update_min_nice = 1;
	} else {
		/* all tunables other than min_nice are in percentage */
	} else if (data != &sysctl_sched_select_prev_cpu_us) {
		/*
		 * all tunables other than min_nice and prev_cpu_us are
		 * in percentage.
		 */
		if (sysctl_sched_downmigrate_pct >
		    sysctl_sched_upmigrate_pct || *data > 100) {
			*data = old_val;
@@ -3824,6 +3879,7 @@ static inline void reset_balance_interval(int cpu)
static inline int migration_needed(struct task_struct *p, int cpu)
{
	int nice;
	struct related_thread_group *grp;

	if (!sched_enable_hmp || p->state != TASK_RUNNING)
		return 0;
@@ -3836,12 +3892,19 @@ static inline int migration_needed(struct task_struct *p, int cpu)
		return IRQLOAD_MIGRATION;

	nice = task_nice(p);
	if (!p->grp && (nice > sched_upmigrate_min_nice ||
		 upmigrate_discouraged(p)) && cpu_capacity(cpu) > min_capacity)
	rcu_read_lock();
	grp = task_related_thread_group(p);
	if (!grp && (nice > sched_upmigrate_min_nice ||
	       upmigrate_discouraged(p)) && cpu_capacity(cpu) > min_capacity) {
		rcu_read_unlock();
		return DOWN_MIGRATION;
	}

	if (!p->grp && !task_will_fit(p, cpu))
	if (!grp && !task_will_fit(p, cpu)) {
		rcu_read_unlock();
		return UP_MIGRATION;
	}
	rcu_read_unlock();

	return 0;
}
@@ -4004,7 +4067,7 @@ void init_new_task_load(struct task_struct *p)
	p->init_load_pct = 0;
	memset(&p->ravg, 0, sizeof(struct ravg));
	p->se.avg.decay_count	= 0;
	p->grp = NULL;
	rcu_assign_pointer(p->grp, NULL);
	INIT_LIST_HEAD(&p->grp_list);

	if (init_load_pct) {
+1 −1
Original line number Diff line number Diff line
@@ -1157,7 +1157,7 @@ static inline int sched_cpu_high_irqload(int cpu)
static inline
struct related_thread_group *task_related_thread_group(struct task_struct *p)
{
	return p->grp;
	return rcu_dereference(p->grp);
}

#else	/* CONFIG_SCHED_HMP */
Loading