Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dfc367a3 authored by Srivatsa Vaddagiri's avatar Srivatsa Vaddagiri Committed by Syed Rameez Mustafa
Browse files

sched: colocate related threads



Provide userspace interface for tasks to be grouped together as
"related" threads. For example, all threads involved in updating
display buffer could be tagged as related.

Scheduler will attempt to provide special treatment for group of
related threads such as:

1) Colocation of related threads in same "preferred" cluster
2) Aggregation of demand towards determination of cluster frequency

This patch extends scheduler to provide best-effort colocation support
for a group of related threads.

Change-Id: Ic2cd769faf5da4d03a8f3cb0ada6224d0101a5f5
Signed-off-by: default avatarSrivatsa Vaddagiri <vatsa@codeaurora.org>
Signed-off-by: default avatarSyed Rameez Mustafa <rameezmustafa@codeaurora.org>
parent 0fc21b2b
Loading
Loading
Loading
Loading
+66 −0
Original line number Diff line number Diff line
@@ -1354,6 +1354,69 @@ static const struct file_operations proc_pid_sched_init_task_load_operations = {
	.release	= single_release,
};

#ifndef CONFIG_SCHED_QHMP
static int sched_group_id_show(struct seq_file *m, void *v)
{
	struct inode *inode = m->private;
	struct task_struct *p;

	p = get_proc_task(inode);
	if (!p)
		return -ESRCH;

	seq_printf(m, "%d\n", sched_get_group_id(p));

	put_task_struct(p);

	return 0;
}

static ssize_t
sched_group_id_write(struct file *file, const char __user *buf,
	    size_t count, loff_t *offset)
{
	struct inode *inode = file_inode(file);
	struct task_struct *p;
	char buffer[PROC_NUMBUF];
	int group_id, err;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		err = -EFAULT;
		goto out;
	}

	err = kstrtoint(strstrip(buffer), 0, &group_id);
	if (err)
		goto out;

	p = get_proc_task(inode);
	if (!p)
		return -ESRCH;

	err = sched_set_group_id(p, group_id);

	put_task_struct(p);

out:
	return err < 0 ? err : count;
}

static int sched_group_id_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, sched_group_id_show, inode);
}

static const struct file_operations proc_pid_sched_group_id_operations = {
	.open		= sched_group_id_open,
	.read		= seq_read,
	.write		= sched_group_id_write,
	.llseek		= seq_lseek,
	.release	= single_release,
};
#endif  /* !CONFIG_SCHED_QHMP */
#endif	/* CONFIG_SCHED_HMP */

#ifdef CONFIG_SCHED_AUTOGROUP
@@ -2696,6 +2759,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
#ifdef CONFIG_SCHED_HMP
	REG("sched_init_task_load",      S_IRUGO|S_IWUSR, proc_pid_sched_init_task_load_operations),
#ifndef CONFIG_SCHED_QHMP
	REG("sched_group_id",      S_IRUGO|S_IWUSR, proc_pid_sched_group_id_operations),
#endif
#endif
#ifdef CONFIG_SCHED_DEBUG
	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
+4 −0
Original line number Diff line number Diff line
@@ -1319,6 +1319,8 @@ struct task_struct {
#ifdef CONFIG_SCHED_QHMP
	u64 run_start;
#endif
	struct related_thread_group *grp;
	struct list_head grp_list;
#endif
#ifdef CONFIG_CGROUP_SCHED
	struct task_group *sched_task_group;
@@ -2198,6 +2200,8 @@ static inline void sched_set_cluster_dstate(const cpumask_t *cluster_cpus,

extern int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle);
extern u32 sched_get_wake_up_idle(struct task_struct *p);
extern int sched_set_group_id(struct task_struct *p, unsigned int group_id);
extern unsigned int sched_get_group_id(struct task_struct *p);

#ifdef CONFIG_SCHED_HMP

+1 −0
Original line number Diff line number Diff line
@@ -75,6 +75,7 @@ extern unsigned int sysctl_sched_small_task_pct;
extern unsigned int sysctl_sched_lowspill_freq;
extern unsigned int sysctl_sched_pack_freq;
extern unsigned int sysctl_sched_select_prev_cpu_us;
extern unsigned int sysctl_sched_enable_colocation;
#if defined(CONFIG_SCHED_FREQ_INPUT)
extern unsigned int sysctl_sched_new_task_windows;
#endif
+25 −0
Original line number Diff line number Diff line
@@ -154,6 +154,31 @@ TRACE_EVENT(sched_task_load,
		__entry->best_cpu, __entry->latency)
);

TRACE_EVENT(sched_set_preferred_cluster,

	TP_PROTO(struct related_thread_group *grp, u64 total_demand),

	TP_ARGS(grp, total_demand),

	TP_STRUCT__entry(
		__field(		int,	id			)
		__field(		u64,	demand			)
		__field(		int,	cluster_first_cpu	)
	),

	TP_fast_assign(
		__entry->id			= grp->id;
		__entry->demand			= total_demand;
		__entry->cluster_first_cpu	= grp->preferred_cluster ?
							cluster_first_cpu(grp->preferred_cluster)
							: -1;
	),

	TP_printk("group_id %d total_demand %llu preferred_cluster_first_cpu %d",
			__entry->id, __entry->demand,
			__entry->cluster_first_cpu)
);

DECLARE_EVENT_CLASS(sched_cpu_load,

	TP_PROTO(struct rq *rq, int idle, u64 irqload, unsigned int power_cost, int temp),
+238 −0
Original line number Diff line number Diff line
@@ -1654,6 +1654,8 @@ __read_mostly unsigned int sysctl_sched_account_wait_time = 1;

__read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);

unsigned int __read_mostly sysctl_sched_enable_colocation = 1;

#ifdef CONFIG_SCHED_FREQ_INPUT

static __read_mostly unsigned int sched_migration_fixup = 1;
@@ -2578,6 +2580,8 @@ void sched_exit(struct task_struct *p)
	struct rq *rq = cpu_rq(cpu);
	u64 wallclock;

	sched_set_group_id(p, 0);

	raw_spin_lock_irqsave(&rq->lock, flags);
	/* rq->curr == p */
	wallclock = sched_ktime_clock();
@@ -2982,6 +2986,206 @@ static void check_for_up_down_migrate_update(const struct cpumask *cpus)
	update_up_down_migrate();
}

static LIST_HEAD(related_thread_groups);
static DEFINE_RWLOCK(related_thread_group_lock);
static int nr_related_thread_groups;

/* Return cluster which can offer required capacity for group */
static struct sched_cluster *
best_cluster(struct related_thread_group *grp, u64 total_demand)
{
	struct sched_cluster *cluster = NULL;

	for_each_sched_cluster(cluster) {
		if (group_will_fit(cluster, grp, total_demand))
			return cluster;
	}

	return NULL;
}

static void _set_preferred_cluster(struct related_thread_group *grp)
{
	struct task_struct *p;
	u64 combined_demand = 0;

	if (!sysctl_sched_enable_colocation) {
		grp->last_update = sched_ktime_clock();
		grp->preferred_cluster = NULL;
		return;
	}

	/*
	 * wakeup of two or more related tasks could race with each other and
	 * could result in multiple calls to _set_preferred_cluster being issued
	 * at same time. Avoid overhead in such cases of rechecking preferred
	 * cluster
	 */
	if (sched_ktime_clock() - grp->last_update < sched_ravg_window / 10)
		return;

	list_for_each_entry(p, &grp->tasks, grp_list)
		combined_demand += p->ravg.demand;

	grp->preferred_cluster = best_cluster(grp, combined_demand);
	grp->last_update = sched_ktime_clock();
	trace_sched_set_preferred_cluster(grp, combined_demand);
}

static void set_preferred_cluster(struct related_thread_group *grp)
{
	raw_spin_lock(&grp->lock);
	_set_preferred_cluster(grp);
	raw_spin_unlock(&grp->lock);
}

struct related_thread_group *alloc_related_thread_group(int group_id)
{
	struct related_thread_group *grp;

	grp = kzalloc(sizeof(*grp), GFP_KERNEL);
	if (!grp)
		return ERR_PTR(-ENOMEM);

	grp->id = group_id;
	INIT_LIST_HEAD(&grp->tasks);
	INIT_LIST_HEAD(&grp->list);
	raw_spin_lock_init(&grp->lock);

	return grp;
}

struct related_thread_group *lookup_related_thread_group(unsigned int group_id)
{
	struct related_thread_group *grp;

	list_for_each_entry(grp, &related_thread_groups, list) {
		if (grp->id == group_id)
			return grp;
	}

	return NULL;
}

static void remove_task_from_group(struct task_struct *p)
{
	struct related_thread_group *grp = p->grp;
	struct rq *rq;
	int empty_group = 1;

	raw_spin_lock(&grp->lock);

	rq = __task_rq_lock(p);
	list_del_init(&p->grp_list);
	p->grp = NULL;
	__task_rq_unlock(rq);

	if (!list_empty(&grp->tasks)) {
		empty_group = 0;
		_set_preferred_cluster(grp);
	}

	raw_spin_unlock(&grp->lock);

	if (empty_group) {
		list_del(&grp->list);
		nr_related_thread_groups--;
		/* See comments before preferred_cluster() */
		kfree_rcu(grp, rcu);
	}
}

static int
add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
{
	struct rq *rq;

	raw_spin_lock(&grp->lock);

	/*
	 * Change p->grp under rq->lock. Will prevent races with read-side
	 * reference of p->grp in various hot-paths
	 */
	rq = __task_rq_lock(p);
	p->grp = grp;
	list_add(&p->grp_list, &grp->tasks);
	__task_rq_unlock(rq);

	_set_preferred_cluster(grp);

	raw_spin_unlock(&grp->lock);

	return 0;
}

int sched_set_group_id(struct task_struct *p, unsigned int group_id)
{
	int rc = 0, destroy = 0;
	unsigned long flags;
	struct related_thread_group *grp = NULL, *new = NULL;

redo:
	raw_spin_lock_irqsave(&p->pi_lock, flags);

	if ((current != p && p->flags & PF_EXITING) ||
			(!p->grp && !group_id) ||
			(p->grp && p->grp->id == group_id))
		goto done;

	write_lock(&related_thread_group_lock);

	if (!group_id) {
		remove_task_from_group(p);
		write_unlock(&related_thread_group_lock);
		goto done;
	}

	if (p->grp && p->grp->id != group_id)
		remove_task_from_group(p);

	grp = lookup_related_thread_group(group_id);
	if (!grp && !new) {
		/* New group */
		write_unlock(&related_thread_group_lock);
		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
		new = alloc_related_thread_group(group_id);
		if (IS_ERR(new))
			return -ENOMEM;
		destroy = 1;
		/* Rerun checks (like task exiting), since we dropped pi_lock */
		goto redo;
	} else if (!grp && new) {
		/* New group - use object allocated before */
		destroy = 0;
		nr_related_thread_groups++;
		list_add(&new->list, &related_thread_groups);
		grp = new;
	}

	BUG_ON(!grp);
	rc = add_task_to_group(p, grp);
	write_unlock(&related_thread_group_lock);
done:
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);

	if (destroy)
		kfree(new);

	return rc;
}

unsigned int sched_get_group_id(struct task_struct *p)
{
	unsigned long flags;
	unsigned int group_id;

	raw_spin_lock_irqsave(&p->pi_lock, flags);
	group_id = p->grp ? p->grp->id : 0;
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);

	return group_id;
}

static int cpufreq_notifier_policy(struct notifier_block *nb,
		unsigned long val, void *data)
{
@@ -3164,6 +3368,25 @@ static void restore_orig_mark_start(struct task_struct *p, u64 mark_start)
	p->ravg.mark_start = mark_start;
}

static inline int update_preferred_cluster(struct related_thread_group *grp,
		struct task_struct *p, u32 old_load)
{
	u32 new_load = task_load(p);

	if (!grp)
		return 0;

	/*
	 * Update if task's load has changed significantly or a complete window
	 * has passed since we last updated preference
	 */
	if (abs(new_load - old_load) > sched_ravg_window / 4 ||
		sched_ktime_clock() - p->grp->last_update > sched_ravg_window)
		return 1;

	return 0;
}

#else	/* CONFIG_SCHED_HMP */

static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
@@ -3856,8 +4079,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
	struct migration_notify_data mnd;
	int heavy_task = 0;
#ifdef CONFIG_SMP
	unsigned int old_load;
	struct rq *rq;
	u64 wallclock;
	struct related_thread_group *grp = NULL;
#endif

	/*
@@ -3893,12 +4118,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
	rq = cpu_rq(task_cpu(p));

	raw_spin_lock(&rq->lock);
	old_load = task_load(p);
	grp = task_related_thread_group(p);
	wallclock = sched_ktime_clock();
	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
	heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE);
	update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
	raw_spin_unlock(&rq->lock);

	if (update_preferred_cluster(grp, p, old_load))
		set_preferred_cluster(grp);

	p->sched_contributes_to_load = !!task_contributes_to_load(p);
	p->state = TASK_WAKING;

@@ -4799,10 +5029,14 @@ void scheduler_tick(void)
	struct task_struct *curr = rq->curr;
	u64 wallclock;
	bool early_notif;
	u32 old_load;
	struct related_thread_group *grp;

	sched_clock_tick();

	raw_spin_lock(&rq->lock);
	old_load = task_load(curr);
	grp = task_related_thread_group(curr);
	set_window_start(rq);
	update_rq_clock(rq);
	curr->sched_class->task_tick(rq, curr, 0);
@@ -4823,6 +5057,10 @@ void scheduler_tick(void)
	trigger_load_balance(rq);
#endif
	rq_last_tick_reset(rq);

	if (update_preferred_cluster(grp, curr, old_load))
		set_preferred_cluster(grp);

	if (curr->sched_class == &fair_sched_class)
		check_for_migration(rq, curr);
}
Loading