Merge remote-tracking branch '318/dev/msm-3.18-sched' into msm-3.18 (34e4e6c5) · Commits · e / devices / android_kernel_xiaomi_markw

Documentation/scheduler/sched-zone.txt

+23 −0

Original line number	Diff line number	Diff line
		@@ -1280,6 +1280,29 @@ power-efficient cpu. We don't let it come back to a power-efficient cpu until
		its demand in reference to the power-efficient cpu drops less than 60%
		(sched_downmigrate).


		*** 7.26 sched_small_wakee_task_load

		Appears at: /proc/sys/kernel/sched_small_wakee_task_load

		Default value: 10

		This tunable is a percentage. Configure the maximum demand of small wakee task.
		Sync wakee tasks which have demand less than sched_small_wakee_task_load are
		categorized as small wakee tasks. Scheduler places small wakee tasks on the
		waker's cluster.


		*** 7.26 sched_big_waker_task_load

		Appears at: /proc/sys/kernel/sched_big_waker_task_load

		Default value: 25

		This tunable is a percentage. Configure the minimum demand of big sync waker
		task. Scheduler places small wakee tasks woken up by big sync waker on the
		waker's cluster.

		=========================
		8. HMP SCHEDULER TRACE POINTS
		=========================

include/linux/sched/sysctl.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -67,6 +67,8 @@ extern unsigned int sysctl_sched_downmigrate_pct;
		extern int sysctl_sched_upmigrate_min_nice;
		extern unsigned int sysctl_sched_boost;
		extern unsigned int sysctl_early_detection_duration;
		extern unsigned int sysctl_sched_small_wakee_task_load_pct;
		extern unsigned int sysctl_sched_big_waker_task_load_pct;

		#ifdef CONFIG_SCHED_QHMP
		extern unsigned int sysctl_sched_min_runtime;

kernel/sched/core.c

+14 −9

Original line number	Diff line number	Diff line
		@@ -3373,7 +3373,7 @@ static void remove_task_from_group(struct task_struct *p)

		rq = __task_rq_lock(p);
		list_del_init(&p->grp_list);
		p->grp = NULL;
		rcu_assign_pointer(p->grp, NULL);
		__task_rq_unlock(rq);

		if (!list_empty(&grp->tasks)) {
		@@ -3403,7 +3403,7 @@ add_task_to_group(struct task_struct p, struct related_thread_group grp)
		* reference of p->grp in various hot-paths
		*/
		rq = __task_rq_lock(p);
		p->grp = grp;
		rcu_assign_pointer(p->grp, grp);
		list_add(&p->grp_list, &grp->tasks);
		__task_rq_unlock(rq);

		@@ -3472,12 +3472,13 @@ done:

		unsigned int sched_get_group_id(struct task_struct *p)
		{
		unsigned long flags;
		unsigned int group_id;
		struct related_thread_group *grp;

		raw_spin_lock_irqsave(&p->pi_lock, flags);
		group_id = p->grp ? p->grp->id : 0;
		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
		rcu_read_lock();
		grp = task_related_thread_group(p);
		group_id = grp ? grp->id : 0;
		rcu_read_unlock();

		return group_id;
		}
		@@ -3667,7 +3668,7 @@ static inline int update_preferred_cluster(struct related_thread_group *grp,
		* has passed since we last updated preference
		*/
		if (abs(new_load - old_load) > sched_ravg_window / 4 \|\|
		sched_ktime_clock() - p->grp->last_update > sched_ravg_window)
		sched_ktime_clock() - grp->last_update > sched_ravg_window)
		return 1;

		return 0;
		@@ -4401,15 +4402,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)

		raw_spin_lock(&rq->lock);
		old_load = task_load(p);
		grp = task_related_thread_group(p);
		wallclock = sched_ktime_clock();
		update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
		heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE);
		update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
		raw_spin_unlock(&rq->lock);

		rcu_read_lock();
		grp = task_related_thread_group(p);
		if (update_preferred_cluster(grp, p, old_load))
		set_preferred_cluster(grp);
		rcu_read_unlock();

		p->sched_contributes_to_load = !!task_contributes_to_load(p);
		p->state = TASK_WAKING;
		@@ -5358,7 +5361,6 @@ void scheduler_tick(void)

		raw_spin_lock(&rq->lock);
		old_load = task_load(curr);
		grp = task_related_thread_group(curr);
		set_window_start(rq);
		update_rq_clock(rq);
		curr->sched_class->task_tick(rq, curr, 0);
		@@ -5380,8 +5382,11 @@ void scheduler_tick(void)
		#endif
		rq_last_tick_reset(rq);

		rcu_read_lock();
		grp = task_related_thread_group(curr);
		if (update_preferred_cluster(grp, curr, old_load))
		set_preferred_cluster(grp);
		rcu_read_unlock();

		if (curr->sched_class == &fair_sched_class)
		check_for_migration(rq, curr);

kernel/sched/fair.c

+83 −20

Original line number	Diff line number	Diff line
		@@ -2469,6 +2469,16 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
		*/
		unsigned int __read_mostly sysctl_sched_enable_power_aware = 0;

		/*
		* Place sync wakee tasks those have less than configured demand to the waker's
		* cluster.
		*/
		unsigned int __read_mostly sched_small_wakee_task_load;
		unsigned int __read_mostly sysctl_sched_small_wakee_task_load_pct = 10;

		unsigned int __read_mostly sched_big_waker_task_load;
		unsigned int __read_mostly sysctl_sched_big_waker_task_load_pct = 25;

		/*
		* CPUs with load greater than the sched_spill_load_threshold are not
		* eligible for task placement. When all CPUs in a cluster achieve a
		@@ -2595,6 +2605,14 @@ void set_hmp_defaults(void)

		sched_short_sleep_task_threshold = sysctl_sched_select_prev_cpu_us *
		NSEC_PER_USEC;

		sched_small_wakee_task_load =
		div64_u64((u64)sysctl_sched_small_wakee_task_load_pct *
		(u64)sched_ravg_window, 100);

		sched_big_waker_task_load =
		div64_u64((u64)sysctl_sched_big_waker_task_load_pct *
		(u64)sched_ravg_window, 100);
		}

		u32 sched_get_init_task_load(struct task_struct *p)
		@@ -2871,6 +2889,7 @@ struct cpu_select_env {
		struct related_thread_group *rtg;
		u8 reason;
		u8 need_idle:1;
		u8 need_waker_cluster:1;
		u8 boost:1;
		u8 sync:1;
		u8 ignore_prev_cpu:1;
		@@ -2910,7 +2929,7 @@ preferred_cluster(struct sched_cluster cluster, struct task_struct p)

		rcu_read_lock();

		grp = p->grp;
		grp = task_related_thread_group(p);
		if (!grp \|\| !sysctl_sched_enable_colocation)
		rc = 1;
		else
		@@ -3036,15 +3055,29 @@ next_candidate(const unsigned long *list, int start, int end)
		return sched_cluster[cluster_id];
		}

		static void update_spare_capacity(
		struct cluster_cpu_stats *stats, int cpu, int capacity, u64 cpu_load)
		static void
		update_spare_capacity(struct cluster_cpu_stats *stats,
		struct cpu_select_env *env, int cpu, int capacity,
		u64 cpu_load)
		{
		s64 spare_capacity = sched_ravg_window - cpu_load;

		if (spare_capacity > 0 &&
		(spare_capacity > stats->highest_spare_capacity \|\|
		(spare_capacity == stats->highest_spare_capacity &&
		capacity > cpu_capacity(stats->best_capacity_cpu)))) {
		((!env->need_waker_cluster &&
		capacity > cpu_capacity(stats->best_capacity_cpu)) \|\|
		(env->need_waker_cluster &&
		cpu_rq(cpu)->nr_running <
		cpu_rq(stats->best_capacity_cpu)->nr_running))))) {
		/*
		* If sync waker is the only runnable of CPU, cr_avg of the
		* CPU is 0 so we have high chance to place the wakee on the
		* waker's CPU which likely causes preemtion of the waker.
		* This can lead migration of preempted waker. Place the
		* wakee on the real idle CPU when it's possible by checking
		* nr_running to avoid such preemption.
		*/
		stats->highest_spare_capacity = spare_capacity;
		stats->best_capacity_cpu = cpu;
		}
		@@ -3064,7 +3097,7 @@ struct cpu_select_env env, struct cluster_cpu_stats stats)
		sched_irqload(i), power_cost(i, task_load(env->p) +
		cpu_cravg_sync(i, env->sync)), 0);

		update_spare_capacity(stats, i, next->capacity,
		update_spare_capacity(stats, env, i, next->capacity,
		cpu_load_sync(i, env->sync));
		}
		}
		@@ -3162,9 +3195,11 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
		if (unlikely(!cpu_active(i)) \|\| skip_cpu(i, env))
		continue;

		update_spare_capacity(stats, i, c->capacity, env->cpu_load);
		update_spare_capacity(stats, env, i, c->capacity,
		env->cpu_load);

		if (env->boost \|\| sched_cpu_high_irqload(i) \|\|
		if (env->boost \|\| env->need_waker_cluster \|\|
		sched_cpu_high_irqload(i) \|\|
		spill_threshold_crossed(env, cpu_rq(i)))
		continue;

		@@ -3235,7 +3270,7 @@ bias_to_prev_cpu(struct cpu_select_env env, struct cluster_cpu_stats stats)
		env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
		if (sched_cpu_high_irqload(prev_cpu) \|\|
		spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
		update_spare_capacity(stats, prev_cpu,
		update_spare_capacity(stats, env, prev_cpu,
		cluster->capacity, env->cpu_load);
		env->ignore_prev_cpu = 1;
		return false;
		@@ -3244,6 +3279,14 @@ bias_to_prev_cpu(struct cpu_select_env env, struct cluster_cpu_stats stats)
		return true;
		}

		static inline bool
		wake_to_waker_cluster(struct cpu_select_env *env)
		{
		return !env->need_idle && !env->reason && env->sync &&
		task_load(current) > sched_big_waker_task_load &&
		task_load(env->p) < sched_small_wakee_task_load;
		}

		static inline int
		cluster_allowed(struct task_struct p, struct sched_cluster cluster)
		{
		@@ -3269,6 +3312,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		.p = p,
		.reason = reason,
		.need_idle = wake_to_idle(p),
		.need_waker_cluster = 0,
		.boost = sched_boost(),
		.sync = sync,
		.prev_cpu = target,
		@@ -3283,7 +3327,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,

		rcu_read_lock();

		grp = p->grp;
		grp = task_related_thread_group(p);

		if (grp && grp->preferred_cluster) {
		pref_cluster = grp->preferred_cluster;
		@@ -3291,10 +3335,18 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
		clear_bit(pref_cluster->id, env.candidate_list);
		else
		env.rtg = grp;
		} else {
		cluster = cpu_rq(smp_processor_id())->cluster;
		if (wake_to_waker_cluster(&env) &&
		cluster_allowed(p, cluster)) {
		env.need_waker_cluster = 1;
		bitmap_zero(env.candidate_list, NR_CPUS);
		__set_bit(cluster->id, env.candidate_list);
		} else if (bias_to_prev_cpu(&env, &stats)) {
		fast_path = true;
		goto out;
		}
		}

		retry:
		cluster = select_least_power_cluster(&env);
		@@ -3763,8 +3815,11 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
		goto done;
		}
		update_min_nice = 1;
		} else {
		/* all tunables other than min_nice are in percentage */
		} else if (data != &sysctl_sched_select_prev_cpu_us) {
		/*
		* all tunables other than min_nice and prev_cpu_us are
		* in percentage.
		*/
		if (sysctl_sched_downmigrate_pct >
		sysctl_sched_upmigrate_pct \|\| *data > 100) {
		*data = old_val;
		@@ -3824,6 +3879,7 @@ static inline void reset_balance_interval(int cpu)
		static inline int migration_needed(struct task_struct *p, int cpu)
		{
		int nice;
		struct related_thread_group *grp;

		if (!sched_enable_hmp \|\| p->state != TASK_RUNNING)
		return 0;
		@@ -3836,12 +3892,19 @@ static inline int migration_needed(struct task_struct *p, int cpu)
		return IRQLOAD_MIGRATION;

		nice = task_nice(p);
		if (!p->grp && (nice > sched_upmigrate_min_nice \|\|
		upmigrate_discouraged(p)) && cpu_capacity(cpu) > min_capacity)
		rcu_read_lock();
		grp = task_related_thread_group(p);
		if (!grp && (nice > sched_upmigrate_min_nice \|\|
		upmigrate_discouraged(p)) && cpu_capacity(cpu) > min_capacity) {
		rcu_read_unlock();
		return DOWN_MIGRATION;
		}

		if (!p->grp && !task_will_fit(p, cpu))
		if (!grp && !task_will_fit(p, cpu)) {
		rcu_read_unlock();
		return UP_MIGRATION;
		}
		rcu_read_unlock();

		return 0;
		}
		@@ -4004,7 +4067,7 @@ void init_new_task_load(struct task_struct *p)
		p->init_load_pct = 0;
		memset(&p->ravg, 0, sizeof(struct ravg));
		p->se.avg.decay_count = 0;
		p->grp = NULL;
		rcu_assign_pointer(p->grp, NULL);
		INIT_LIST_HEAD(&p->grp_list);

		if (init_load_pct) {

kernel/sched/sched.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -1157,7 +1157,7 @@ static inline int sched_cpu_high_irqload(int cpu)
		static inline
		struct related_thread_group task_related_thread_group(struct task_struct p)
		{
		return p->grp;
		return rcu_dereference(p->grp);
		}

		#else /* CONFIG_SCHED_HMP */