Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca94c442 authored by Lennart Poettering's avatar Lennart Poettering Committed by Ingo Molnar
Browse files

sched: Introduce SCHED_RESET_ON_FORK scheduling policy flag



This patch introduces a new flag SCHED_RESET_ON_FORK which can be passed
to the kernel via sched_setscheduler(), ORed in the policy parameter. If
set this will make sure that when the process forks a) the scheduling
priority is reset to DEFAULT_PRIO if it was higher and b) the scheduling
policy is reset to SCHED_NORMAL if it was either SCHED_FIFO or SCHED_RR.

Why have this?

Currently, if a process is real-time scheduled this will 'leak' to all
its child processes. For security reasons it is often (always?) a good
idea to make sure that if a process acquires RT scheduling this is
confined to this process and only this process. More specifically this
makes the per-process resource limit RLIMIT_RTTIME useful for security
purposes, because it makes it impossible to use a fork bomb to
circumvent the per-process RLIMIT_RTTIME accounting.

This feature is also useful for tools like 'renice' which can then
change the nice level of a process without having this spill to all its
child processes.

Why expose this via sched_setscheduler() and not other syscalls such as
prctl() or sched_setparam()?

prctl() does not take a pid parameter. Due to that it would be
impossible to modify this flag for other processes than the current one.

The struct passed to sched_setparam() can unfortunately not be extended
without breaking compatibility, since sched_setparam() lacks a size
parameter.

How to use this from userspace? In your RT program simply replace this:

  sched_setscheduler(pid, SCHED_FIFO, &param);

by this:

  sched_setscheduler(pid, SCHED_FIFO|SCHED_RESET_ON_FORK, &param);

Signed-off-by: default avatarLennart Poettering <lennart@poettering.net>
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090615152714.GA29092@tango.0pointer.de>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 45e3e193
Loading
Loading
Loading
Loading
+6 −0
Original line number Original line Diff line number Diff line
@@ -38,6 +38,8 @@
#define SCHED_BATCH		3
#define SCHED_BATCH		3
/* SCHED_ISO: reserved but not implemented yet */
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE		5
#define SCHED_IDLE		5
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK     0x40000000


#ifdef __KERNEL__
#ifdef __KERNEL__


@@ -1209,6 +1211,10 @@ struct task_struct {
	unsigned did_exec:1;
	unsigned did_exec:1;
	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
				 * execve */
				 * execve */

	/* Revert to default priority/policy when forking */
	unsigned sched_reset_on_fork:1;

	pid_t pid;
	pid_t pid;
	pid_t tgid;
	pid_t tgid;


+40 −9
Original line number Original line Diff line number Diff line
@@ -2613,12 +2613,28 @@ void sched_fork(struct task_struct *p, int clone_flags)
	set_task_cpu(p, cpu);
	set_task_cpu(p, cpu);


	/*
	/*
	 * Make sure we do not leak PI boosting priority to the child:
	 * Revert to default priority/policy on fork if requested. Make sure we
	 * do not leak PI boosting priority to the child.
	 */
	 */
	if (current->sched_reset_on_fork &&
			(p->policy == SCHED_FIFO || p->policy == SCHED_RR))
		p->policy = SCHED_NORMAL;

	if (current->sched_reset_on_fork &&
			(current->normal_prio < DEFAULT_PRIO))
		p->prio = DEFAULT_PRIO;
	else
		p->prio = current->normal_prio;
		p->prio = current->normal_prio;

	if (!rt_prio(p->prio))
	if (!rt_prio(p->prio))
		p->sched_class = &fair_sched_class;
		p->sched_class = &fair_sched_class;


	/*
	 * We don't need the reset flag anymore after the fork. It has
	 * fulfilled its duty:
	 */
	p->sched_reset_on_fork = 0;

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
	if (likely(sched_info_on()))
	if (likely(sched_info_on()))
		memset(&p->sched_info, 0, sizeof(p->sched_info));
		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -6094,17 +6110,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
	unsigned long flags;
	unsigned long flags;
	const struct sched_class *prev_class = p->sched_class;
	const struct sched_class *prev_class = p->sched_class;
	struct rq *rq;
	struct rq *rq;
	int reset_on_fork;


	/* may grab non-irq protected spin_locks */
	/* may grab non-irq protected spin_locks */
	BUG_ON(in_interrupt());
	BUG_ON(in_interrupt());
recheck:
recheck:
	/* double check policy once rq lock held */
	/* double check policy once rq lock held */
	if (policy < 0)
	if (policy < 0) {
		reset_on_fork = p->sched_reset_on_fork;
		policy = oldpolicy = p->policy;
		policy = oldpolicy = p->policy;
	else if (policy != SCHED_FIFO && policy != SCHED_RR &&
	} else {
		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
		policy &= ~SCHED_RESET_ON_FORK;

		if (policy != SCHED_FIFO && policy != SCHED_RR &&
				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
				policy != SCHED_IDLE)
				policy != SCHED_IDLE)
			return -EINVAL;
			return -EINVAL;
	}

	/*
	/*
	 * Valid priorities for SCHED_FIFO and SCHED_RR are
	 * Valid priorities for SCHED_FIFO and SCHED_RR are
	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@ -6148,6 +6172,10 @@ recheck:
		/* can't change other user's priorities */
		/* can't change other user's priorities */
		if (!check_same_owner(p))
		if (!check_same_owner(p))
			return -EPERM;
			return -EPERM;

		/* Normal users shall not reset the sched_reset_on_fork flag */
		if (p->sched_reset_on_fork && !reset_on_fork)
			return -EPERM;
	}
	}


	if (user) {
	if (user) {
@@ -6191,6 +6219,8 @@ recheck:
	if (running)
	if (running)
		p->sched_class->put_prev_task(rq, p);
		p->sched_class->put_prev_task(rq, p);


	p->sched_reset_on_fork = reset_on_fork;

	oldprio = p->prio;
	oldprio = p->prio;
	__setscheduler(rq, p, policy, param->sched_priority);
	__setscheduler(rq, p, policy, param->sched_priority);


@@ -6307,14 +6337,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
	if (p) {
	if (p) {
		retval = security_task_getscheduler(p);
		retval = security_task_getscheduler(p);
		if (!retval)
		if (!retval)
			retval = p->policy;
			retval = p->policy
				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
	}
	}
	read_unlock(&tasklist_lock);
	read_unlock(&tasklist_lock);
	return retval;
	return retval;
}
}


/**
/**
 * sys_sched_getscheduler - get the RT priority of a thread
 * sys_sched_getparam - get the RT priority of a thread
 * @pid: the pid in question.
 * @pid: the pid in question.
 * @param: structure containing the RT priority.
 * @param: structure containing the RT priority.
 */
 */