sched: colocate related threads (dfc367a3) · Commits · e / devices / android_kernel_xiaomi_markw

fs/proc/base.c

+66 −0

Original line number	Diff line number	Diff line
		@@ -1354,6 +1354,69 @@ static const struct file_operations proc_pid_sched_init_task_load_operations = {
		.release = single_release,
		};

		#ifndef CONFIG_SCHED_QHMP
		static int sched_group_id_show(struct seq_file m, void v)
		{
		struct inode *inode = m->private;
		struct task_struct *p;

		p = get_proc_task(inode);
		if (!p)
		return -ESRCH;

		seq_printf(m, "%d\n", sched_get_group_id(p));

		put_task_struct(p);

		return 0;
		}

		static ssize_t
		sched_group_id_write(struct file file, const char __user buf,
		size_t count, loff_t *offset)
		{
		struct inode *inode = file_inode(file);
		struct task_struct *p;
		char buffer[PROC_NUMBUF];
		int group_id, err;

		memset(buffer, 0, sizeof(buffer));
		if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
		if (copy_from_user(buffer, buf, count)) {
		err = -EFAULT;
		goto out;
		}

		err = kstrtoint(strstrip(buffer), 0, &group_id);
		if (err)
		goto out;

		p = get_proc_task(inode);
		if (!p)
		return -ESRCH;

		err = sched_set_group_id(p, group_id);

		put_task_struct(p);

		out:
		return err < 0 ? err : count;
		}

		static int sched_group_id_open(struct inode inode, struct file filp)
		{
		return single_open(filp, sched_group_id_show, inode);
		}

		static const struct file_operations proc_pid_sched_group_id_operations = {
		.open = sched_group_id_open,
		.read = seq_read,
		.write = sched_group_id_write,
		.llseek = seq_lseek,
		.release = single_release,
		};
		#endif /* !CONFIG_SCHED_QHMP */
		#endif /* CONFIG_SCHED_HMP */

		#ifdef CONFIG_SCHED_AUTOGROUP
		@@ -2696,6 +2759,9 @@ static const struct pid_entry tgid_base_stuff[] = {
		#endif
		#ifdef CONFIG_SCHED_HMP
		REG("sched_init_task_load", S_IRUGO\|S_IWUSR, proc_pid_sched_init_task_load_operations),
		#ifndef CONFIG_SCHED_QHMP
		REG("sched_group_id", S_IRUGO\|S_IWUSR, proc_pid_sched_group_id_operations),
		#endif
		#endif
		#ifdef CONFIG_SCHED_DEBUG
		REG("sched", S_IRUGO\|S_IWUSR, proc_pid_sched_operations),

include/linux/sched.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -1319,6 +1319,8 @@ struct task_struct {
		#ifdef CONFIG_SCHED_QHMP
		u64 run_start;
		#endif
		struct related_thread_group *grp;
		struct list_head grp_list;
		#endif
		#ifdef CONFIG_CGROUP_SCHED
		struct task_group *sched_task_group;
		@@ -2198,6 +2200,8 @@ static inline void sched_set_cluster_dstate(const cpumask_t *cluster_cpus,

		extern int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle);
		extern u32 sched_get_wake_up_idle(struct task_struct *p);
		extern int sched_set_group_id(struct task_struct *p, unsigned int group_id);
		extern unsigned int sched_get_group_id(struct task_struct *p);

		#ifdef CONFIG_SCHED_HMP

include/linux/sched/sysctl.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -75,6 +75,7 @@ extern unsigned int sysctl_sched_small_task_pct;
		extern unsigned int sysctl_sched_lowspill_freq;
		extern unsigned int sysctl_sched_pack_freq;
		extern unsigned int sysctl_sched_select_prev_cpu_us;
		extern unsigned int sysctl_sched_enable_colocation;
		#if defined(CONFIG_SCHED_FREQ_INPUT)
		extern unsigned int sysctl_sched_new_task_windows;
		#endif

include/trace/events/sched.h

+25 −0

Original line number	Diff line number	Diff line
		@@ -154,6 +154,31 @@ TRACE_EVENT(sched_task_load,
		__entry->best_cpu, __entry->latency)
		);

		TRACE_EVENT(sched_set_preferred_cluster,

		TP_PROTO(struct related_thread_group *grp, u64 total_demand),

		TP_ARGS(grp, total_demand),

		TP_STRUCT__entry(
		__field( int, id )
		__field( u64, demand )
		__field( int, cluster_first_cpu )
		),

		TP_fast_assign(
		__entry->id = grp->id;
		__entry->demand = total_demand;
		__entry->cluster_first_cpu = grp->preferred_cluster ?
		cluster_first_cpu(grp->preferred_cluster)
		: -1;
		),

		TP_printk("group_id %d total_demand %llu preferred_cluster_first_cpu %d",
		__entry->id, __entry->demand,
		__entry->cluster_first_cpu)
		);

		DECLARE_EVENT_CLASS(sched_cpu_load,

		TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost, int temp),

kernel/sched/core.c

+238 −0

Original line number	Diff line number	Diff line
		@@ -1654,6 +1654,8 @@ __read_mostly unsigned int sysctl_sched_account_wait_time = 1;

		__read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);

		unsigned int __read_mostly sysctl_sched_enable_colocation = 1;

		#ifdef CONFIG_SCHED_FREQ_INPUT

		static __read_mostly unsigned int sched_migration_fixup = 1;
		@@ -2578,6 +2580,8 @@ void sched_exit(struct task_struct *p)
		struct rq *rq = cpu_rq(cpu);
		u64 wallclock;

		sched_set_group_id(p, 0);

		raw_spin_lock_irqsave(&rq->lock, flags);
		/* rq->curr == p */
		wallclock = sched_ktime_clock();
		@@ -2982,6 +2986,206 @@ static void check_for_up_down_migrate_update(const struct cpumask *cpus)
		update_up_down_migrate();
		}

		static LIST_HEAD(related_thread_groups);
		static DEFINE_RWLOCK(related_thread_group_lock);
		static int nr_related_thread_groups;

		/* Return cluster which can offer required capacity for group */
		static struct sched_cluster *
		best_cluster(struct related_thread_group *grp, u64 total_demand)
		{
		struct sched_cluster *cluster = NULL;

		for_each_sched_cluster(cluster) {
		if (group_will_fit(cluster, grp, total_demand))
		return cluster;
		}

		return NULL;
		}

		static void _set_preferred_cluster(struct related_thread_group *grp)
		{
		struct task_struct *p;
		u64 combined_demand = 0;

		if (!sysctl_sched_enable_colocation) {
		grp->last_update = sched_ktime_clock();
		grp->preferred_cluster = NULL;
		return;
		}

		/*
		* wakeup of two or more related tasks could race with each other and
		* could result in multiple calls to _set_preferred_cluster being issued
		* at same time. Avoid overhead in such cases of rechecking preferred
		* cluster
		*/
		if (sched_ktime_clock() - grp->last_update < sched_ravg_window / 10)
		return;

		list_for_each_entry(p, &grp->tasks, grp_list)
		combined_demand += p->ravg.demand;

		grp->preferred_cluster = best_cluster(grp, combined_demand);
		grp->last_update = sched_ktime_clock();
		trace_sched_set_preferred_cluster(grp, combined_demand);
		}

		static void set_preferred_cluster(struct related_thread_group *grp)
		{
		raw_spin_lock(&grp->lock);
		_set_preferred_cluster(grp);
		raw_spin_unlock(&grp->lock);
		}

		struct related_thread_group *alloc_related_thread_group(int group_id)
		{
		struct related_thread_group *grp;

		grp = kzalloc(sizeof(*grp), GFP_KERNEL);
		if (!grp)
		return ERR_PTR(-ENOMEM);

		grp->id = group_id;
		INIT_LIST_HEAD(&grp->tasks);
		INIT_LIST_HEAD(&grp->list);
		raw_spin_lock_init(&grp->lock);

		return grp;
		}

		struct related_thread_group *lookup_related_thread_group(unsigned int group_id)
		{
		struct related_thread_group *grp;

		list_for_each_entry(grp, &related_thread_groups, list) {
		if (grp->id == group_id)
		return grp;
		}

		return NULL;
		}

		static void remove_task_from_group(struct task_struct *p)
		{
		struct related_thread_group *grp = p->grp;
		struct rq *rq;
		int empty_group = 1;

		raw_spin_lock(&grp->lock);

		rq = __task_rq_lock(p);
		list_del_init(&p->grp_list);
		p->grp = NULL;
		__task_rq_unlock(rq);

		if (!list_empty(&grp->tasks)) {
		empty_group = 0;
		_set_preferred_cluster(grp);
		}

		raw_spin_unlock(&grp->lock);

		if (empty_group) {
		list_del(&grp->list);
		nr_related_thread_groups--;
		/* See comments before preferred_cluster() */
		kfree_rcu(grp, rcu);
		}
		}

		static int
		add_task_to_group(struct task_struct p, struct related_thread_group grp)
		{
		struct rq *rq;

		raw_spin_lock(&grp->lock);

		/*
		* Change p->grp under rq->lock. Will prevent races with read-side
		* reference of p->grp in various hot-paths
		*/
		rq = __task_rq_lock(p);
		p->grp = grp;
		list_add(&p->grp_list, &grp->tasks);
		__task_rq_unlock(rq);

		_set_preferred_cluster(grp);

		raw_spin_unlock(&grp->lock);

		return 0;
		}

		int sched_set_group_id(struct task_struct *p, unsigned int group_id)
		{
		int rc = 0, destroy = 0;
		unsigned long flags;
		struct related_thread_group grp = NULL, new = NULL;

		redo:
		raw_spin_lock_irqsave(&p->pi_lock, flags);

		if ((current != p && p->flags & PF_EXITING) \|\|
		(!p->grp && !group_id) \|\|
		(p->grp && p->grp->id == group_id))
		goto done;

		write_lock(&related_thread_group_lock);

		if (!group_id) {
		remove_task_from_group(p);
		write_unlock(&related_thread_group_lock);
		goto done;
		}

		if (p->grp && p->grp->id != group_id)
		remove_task_from_group(p);

		grp = lookup_related_thread_group(group_id);
		if (!grp && !new) {
		/* New group */
		write_unlock(&related_thread_group_lock);
		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
		new = alloc_related_thread_group(group_id);
		if (IS_ERR(new))
		return -ENOMEM;
		destroy = 1;
		/* Rerun checks (like task exiting), since we dropped pi_lock */
		goto redo;
		} else if (!grp && new) {
		/* New group - use object allocated before */
		destroy = 0;
		nr_related_thread_groups++;
		list_add(&new->list, &related_thread_groups);
		grp = new;
		}

		BUG_ON(!grp);
		rc = add_task_to_group(p, grp);
		write_unlock(&related_thread_group_lock);
		done:
		raw_spin_unlock_irqrestore(&p->pi_lock, flags);

		if (destroy)
		kfree(new);

		return rc;
		}

		unsigned int sched_get_group_id(struct task_struct *p)
		{
		unsigned long flags;
		unsigned int group_id;

		raw_spin_lock_irqsave(&p->pi_lock, flags);
		group_id = p->grp ? p->grp->id : 0;
		raw_spin_unlock_irqrestore(&p->pi_lock, flags);

		return group_id;
		}

		static int cpufreq_notifier_policy(struct notifier_block *nb,
		unsigned long val, void *data)
		{
		@@ -3164,6 +3368,25 @@ static void restore_orig_mark_start(struct task_struct *p, u64 mark_start)
		p->ravg.mark_start = mark_start;
		}

		static inline int update_preferred_cluster(struct related_thread_group *grp,
		struct task_struct *p, u32 old_load)
		{
		u32 new_load = task_load(p);

		if (!grp)
		return 0;

		/*
		* Update if task's load has changed significantly or a complete window
		* has passed since we last updated preference
		*/
		if (abs(new_load - old_load) > sched_ravg_window / 4 \|\|
		sched_ktime_clock() - p->grp->last_update > sched_ravg_window)
		return 1;

		return 0;
		}

		#else /* CONFIG_SCHED_HMP */

		static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
		@@ -3856,8 +4079,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		struct migration_notify_data mnd;
		int heavy_task = 0;
		#ifdef CONFIG_SMP
		unsigned int old_load;
		struct rq *rq;
		u64 wallclock;
		struct related_thread_group *grp = NULL;
		#endif

		/*
		@@ -3893,12 +4118,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		rq = cpu_rq(task_cpu(p));

		raw_spin_lock(&rq->lock);
		old_load = task_load(p);
		grp = task_related_thread_group(p);
		wallclock = sched_ktime_clock();
		update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
		heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE);
		update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
		raw_spin_unlock(&rq->lock);

		if (update_preferred_cluster(grp, p, old_load))
		set_preferred_cluster(grp);

		p->sched_contributes_to_load = !!task_contributes_to_load(p);
		p->state = TASK_WAKING;

		@@ -4799,10 +5029,14 @@ void scheduler_tick(void)
		struct task_struct *curr = rq->curr;
		u64 wallclock;
		bool early_notif;
		u32 old_load;
		struct related_thread_group *grp;

		sched_clock_tick();

		raw_spin_lock(&rq->lock);
		old_load = task_load(curr);
		grp = task_related_thread_group(curr);
		set_window_start(rq);
		update_rq_clock(rq);
		curr->sched_class->task_tick(rq, curr, 0);
		@@ -4823,6 +5057,10 @@ void scheduler_tick(void)
		trigger_load_balance(rq);
		#endif
		rq_last_tick_reset(rq);

		if (update_preferred_cluster(grp, curr, old_load))
		set_preferred_cluster(grp);

		if (curr->sched_class == &fair_sched_class)
		check_for_migration(rq, curr);
		}