Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5091faa4 authored by Mike Galbraith's avatar Mike Galbraith Committed by Ingo Molnar
Browse files

sched: Add 'autogroup' scheduling feature: automated per session task groups



A recurring complaint from CFS users is that parallel kbuild has
a negative impact on desktop interactivity.  This patch
implements an idea from Linus, to automatically create task
groups.  Currently, only per session autogroups are implemented,
but the patch leaves the way open for enhancement.

Implementation: each task's signal struct contains an inherited
pointer to a refcounted autogroup struct containing a task group
pointer, the default for all tasks pointing to the
init_task_group.  When a task calls setsid(), a new task group
is created, the process is moved into the new task group, and a
reference to the preveious task group is dropped.  Child
processes inherit this task group thereafter, and increase it's
refcount.  When the last thread of a process exits, the
process's reference is dropped, such that when the last process
referencing an autogroup exits, the autogroup is destroyed.

At runqueue selection time, IFF a task has no cgroup assignment,
its current autogroup is used.

Autogroup bandwidth is controllable via setting it's nice level
through the proc filesystem:

  cat /proc/<pid>/autogroup

Displays the task's group and the group's nice level.

  echo <nice level> > /proc/<pid>/autogroup

Sets the task group's shares to the weight of nice <level> task.
Setting nice level is rate limited for !admin users due to the
abuse risk of task group locking.

The feature is enabled from boot by default if
CONFIG_SCHED_AUTOGROUP=y is selected, but can be disabled via
the boot option noautogroup, and can also be turned on/off on
the fly via:

  echo [01] > /proc/sys/kernel/sched_autogroup_enabled

... which will automatically move tasks to/from the root task group.

Signed-off-by: default avatarMike Galbraith <efault@gmx.de>
Acked-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Paul Turner <pjt@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
[ Removed the task_group_path() debug code, and fixed !EVENTFD build failure. ]
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
LKML-Reference: <1290281700.28711.9.camel@maggy.simson.net>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 822bc180
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -1622,6 +1622,8 @@ and is between 256 and 4096 characters. It is defined in the file
	noapic		[SMP,APIC] Tells the kernel to not make use of any
	noapic		[SMP,APIC] Tells the kernel to not make use of any
			IOAPICs that may be present in the system.
			IOAPICs that may be present in the system.


	noautogroup	Disable scheduler automatic task group creation.

	nobats		[PPC] Do not use BATs for mapping kernel lowmem
	nobats		[PPC] Do not use BATs for mapping kernel lowmem
			on "Classic" PPC cores.
			on "Classic" PPC cores.


+79 −0
Original line number Original line Diff line number Diff line
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {


#endif
#endif


#ifdef CONFIG_SCHED_AUTOGROUP
/*
 * Print out autogroup related information:
 */
static int sched_autogroup_show(struct seq_file *m, void *v)
{
	struct inode *inode = m->private;
	struct task_struct *p;

	p = get_proc_task(inode);
	if (!p)
		return -ESRCH;
	proc_sched_autogroup_show_task(p, m);

	put_task_struct(p);

	return 0;
}

static ssize_t
sched_autogroup_write(struct file *file, const char __user *buf,
	    size_t count, loff_t *offset)
{
	struct inode *inode = file->f_path.dentry->d_inode;
	struct task_struct *p;
	char buffer[PROC_NUMBUF];
	long nice;
	int err;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count))
		return -EFAULT;

	err = strict_strtol(strstrip(buffer), 0, &nice);
	if (err)
		return -EINVAL;

	p = get_proc_task(inode);
	if (!p)
		return -ESRCH;

	err = nice;
	err = proc_sched_autogroup_set_nice(p, &err);
	if (err)
		count = err;

	put_task_struct(p);

	return count;
}

static int sched_autogroup_open(struct inode *inode, struct file *filp)
{
	int ret;

	ret = single_open(filp, sched_autogroup_show, NULL);
	if (!ret) {
		struct seq_file *m = filp->private_data;

		m->private = inode;
	}
	return ret;
}

static const struct file_operations proc_pid_sched_autogroup_operations = {
	.open		= sched_autogroup_open,
	.read		= seq_read,
	.write		= sched_autogroup_write,
	.llseek		= seq_lseek,
	.release	= single_release,
};

#endif /* CONFIG_SCHED_AUTOGROUP */

static ssize_t comm_write(struct file *file, const char __user *buf,
static ssize_t comm_write(struct file *file, const char __user *buf,
				size_t count, loff_t *offset)
				size_t count, loff_t *offset)
{
{
@@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = {
	INF("limits",	  S_IRUGO, proc_pid_limits),
	INF("limits",	  S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
#ifdef CONFIG_SCHED_DEBUG
	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
	REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
#endif
#endif
	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
	REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+23 −0
Original line number Original line Diff line number Diff line
@@ -509,6 +509,8 @@ struct thread_group_cputimer {
	spinlock_t lock;
	spinlock_t lock;
};
};


struct autogroup;

/*
/*
 * NOTE! "signal_struct" does not have it's own
 * NOTE! "signal_struct" does not have it's own
 * locking, because a shared signal_struct always
 * locking, because a shared signal_struct always
@@ -576,6 +578,9 @@ struct signal_struct {


	struct tty_struct *tty; /* NULL if no tty */
	struct tty_struct *tty; /* NULL if no tty */


#ifdef CONFIG_SCHED_AUTOGROUP
	struct autogroup *autogroup;
#endif
	/*
	/*
	 * Cumulative resource counters for dead threads in the group,
	 * Cumulative resource counters for dead threads in the group,
	 * and for reaped dead child processes forked by this group.
	 * and for reaped dead child processes forked by this group.
@@ -1927,6 +1932,24 @@ int sched_rt_handler(struct ctl_table *table, int write,


extern unsigned int sysctl_sched_compat_yield;
extern unsigned int sysctl_sched_compat_yield;


#ifdef CONFIG_SCHED_AUTOGROUP
extern unsigned int sysctl_sched_autogroup_enabled;

extern void sched_autogroup_create_attach(struct task_struct *p);
extern void sched_autogroup_detach(struct task_struct *p);
extern void sched_autogroup_fork(struct signal_struct *sig);
extern void sched_autogroup_exit(struct signal_struct *sig);
#ifdef CONFIG_PROC_FS
extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
#endif
#else
static inline void sched_autogroup_create_attach(struct task_struct *p) { }
static inline void sched_autogroup_detach(struct task_struct *p) { }
static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
#endif

#ifdef CONFIG_RT_MUTEXES
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
extern int rt_mutex_getprio(struct task_struct *p);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
+13 −0
Original line number Original line Diff line number Diff line
@@ -728,6 +728,19 @@ config NET_NS


endif # NAMESPACES
endif # NAMESPACES


config SCHED_AUTOGROUP
	bool "Automatic process group scheduling"
	select EVENTFD
	select CGROUPS
	select CGROUP_SCHED
	select FAIR_GROUP_SCHED
	help
	  This option optimizes the scheduler for common desktop workloads by
	  automatically creating and populating task groups.  This separation
	  of workloads isolates aggressive CPU burners (like build jobs) from
	  desktop applications.  Task group autogeneration is currently based
	  upon task session.

config MM_OWNER
config MM_OWNER
	bool
	bool


+4 −1
Original line number Original line Diff line number Diff line
@@ -174,9 +174,11 @@ static inline void free_signal_struct(struct signal_struct *sig)


static inline void put_signal_struct(struct signal_struct *sig)
static inline void put_signal_struct(struct signal_struct *sig)
{
{
	if (atomic_dec_and_test(&sig->sigcnt))
	if (atomic_dec_and_test(&sig->sigcnt)) {
		sched_autogroup_exit(sig);
		free_signal_struct(sig);
		free_signal_struct(sig);
	}
	}
}


void __put_task_struct(struct task_struct *tsk)
void __put_task_struct(struct task_struct *tsk)
{
{
@@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
	posix_cpu_timers_init_group(sig);
	posix_cpu_timers_init_group(sig);


	tty_audit_fork(sig);
	tty_audit_fork(sig);
	sched_autogroup_fork(sig);


	sig->oom_adj = current->signal->oom_adj;
	sig->oom_adj = current->signal->oom_adj;
	sig->oom_score_adj = current->signal->oom_score_adj;
	sig->oom_score_adj = current->signal->oom_score_adj;
Loading