Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b5869ce7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched: (140 commits)
  sched: sync wakeups preempt too
  sched: affine sync wakeups
  sched: guest CPU accounting: maintain guest state in KVM
  sched: guest CPU accounting: maintain stats in account_system_time()
  sched: guest CPU accounting: add guest-CPU /proc/<pid>/stat fields
  sched: guest CPU accounting: add guest-CPU /proc/stat field
  sched: domain sysctl fixes: add terminator comment
  sched: domain sysctl fixes: do not crash on allocation failure
  sched: domain sysctl fixes: unregister the sysctl table before domains
  sched: domain sysctl fixes: use for_each_online_cpu()
  sched: domain sysctl fixes: use kcalloc()
  Make scheduler debug file operations const
  sched: enable wake-idle on CONFIG_SCHED_MC=y
  sched: reintroduce topology.h tunings
  sched: allow the immediate migration of cache-cold tasks
  sched: debug, improve migration statistics
  sched: debug: increase width of debug line
  sched: activate task_hot() only on fair-scheduled tasks
  sched: reintroduce cache-hot affinity
  sched: speed up context-switches a bit
  ...
parents df3d80f5 9c63d9c0
Loading
Loading
Loading
Loading
+67 −0
Original line number Diff line number Diff line
@@ -117,3 +117,70 @@ Some implementation details:
   iterators of the scheduling modules are used. The balancing code got
   quite a bit simpler as a result.


Group scheduler extension to CFS
================================

Normally the scheduler operates on individual tasks and strives to provide
fair CPU time to each task. Sometimes, it may be desirable to group tasks
and provide fair CPU time to each such task group. For example, it may
be desirable to first provide fair CPU time to each user on the system
and then to each task belonging to a user.

CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
groups. At present, there are two (mutually exclusive) mechanisms to group
tasks for CPU bandwidth control purpose:

	- Based on user id (CONFIG_FAIR_USER_SCHED)
		In this option, tasks are grouped according to their user id.
	- Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
		This options lets the administrator create arbitrary groups
		of tasks, using the "cgroup" pseudo filesystem. See
		Documentation/cgroups.txt for more information about this
		filesystem.

Only one of these options to group tasks can be chosen and not both.

Group scheduler tunables:

When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
each new user and a "cpu_share" file is added in that directory.

	# cd /sys/kernel/uids
	# cat 512/cpu_share		# Display user 512's CPU share
	1024
	# echo 2048 > 512/cpu_share	# Modify user 512's CPU share
	# cat 512/cpu_share		# Display user 512's CPU share
	2048
	#

CPU bandwidth between two users are divided in the ratio of their CPU shares.
For ex: if you would like user "root" to get twice the bandwidth of user
"guest", then set the cpu_share for both the users such that "root"'s
cpu_share is twice "guest"'s cpu_share


When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
for each group created using the pseudo filesystem. See example steps
below to create task groups and modify their CPU share using the "cgroups"
pseudo filesystem

	# mkdir /dev/cpuctl
	# mount -t cgroup -ocpu none /dev/cpuctl
	# cd /dev/cpuctl

	# mkdir multimedia	# create "multimedia" group of tasks
	# mkdir browser		# create "browser" group of tasks

	# #Configure the multimedia group to receive twice the CPU bandwidth
	# #that of browser group

	# echo 2048 > multimedia/cpu.shares
	# echo 1024 > browser/cpu.shares

	# firefox &	# Launch firefox and move it to "browser" group
	# echo <firefox_pid> > browser/tasks

	# #Launch gmplayer (or your favourite movie player)
	# echo <movie_player_pid> > multimedia/tasks
+11 −0
Original line number Diff line number Diff line
@@ -214,6 +214,17 @@ config X86_ES7000

endchoice

config SCHED_NO_NO_OMIT_FRAME_POINTER
	bool "Single-depth WCHAN output"
	default y
	help
	  Calculate simpler /proc/<PID>/wchan values. If this option
	  is disabled then wchan values will recurse back to the
	  caller function. This provides more accurate wchan values,
	  at the expense of slightly more scheduling overhead.

	  If in doubt, say "Y".

config PARAVIRT
	bool "Paravirtualization support (EXPERIMENTAL)"
	depends on EXPERIMENTAL
+10 −0
Original line number Diff line number Diff line
@@ -624,6 +624,16 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);

int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);

static inline void kvm_guest_enter(void)
{
	current->flags |= PF_VCPU;
}

static inline void kvm_guest_exit(void)
{
	current->flags &= ~PF_VCPU;
}

static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
				     u32 error_code)
{
+2 −0
Original line number Diff line number Diff line
@@ -2046,6 +2046,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);

	vcpu->guest_mode = 1;
	kvm_guest_enter();

	if (vcpu->requests)
		if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
@@ -2053,6 +2054,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)

	kvm_x86_ops->run(vcpu, kvm_run);

	kvm_guest_exit();
	vcpu->guest_mode = 0;
	local_irq_enable();

+4 −5
Original line number Diff line number Diff line
@@ -45,8 +45,7 @@ void pipe_wait(struct pipe_inode_info *pipe)
	 * Pipes are system-local resources, so sleeping on them
	 * is considered a noninteractive wait:
	 */
	prepare_to_wait(&pipe->wait, &wait,
			TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
	if (pipe->inode)
		mutex_unlock(&pipe->inode->i_mutex);
	schedule();
@@ -383,7 +382,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,

	/* Signal writers asynchronously that there is more room. */
	if (do_wakeup) {
		wake_up_interruptible(&pipe->wait);
		wake_up_interruptible_sync(&pipe->wait);
		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
	}
	if (ret > 0)
@@ -556,7 +555,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
out:
	mutex_unlock(&inode->i_mutex);
	if (do_wakeup) {
		wake_up_interruptible(&pipe->wait);
		wake_up_interruptible_sync(&pipe->wait);
		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
	}
	if (ret > 0)
@@ -650,7 +649,7 @@ pipe_release(struct inode *inode, int decr, int decw)
	if (!pipe->readers && !pipe->writers) {
		free_pipe_info(inode);
	} else {
		wake_up_interruptible(&pipe->wait);
		wake_up_interruptible_sync(&pipe->wait);
		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
	}
Loading