Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d0797b39 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
  sched: tweak the sched_runtime_limit tunable
  sched: skip updating rq's next_balance under null SD
  sched: fix broken SMT/MC optimizations
  sched: accounting regression since rc1
  sched: fix sysctl directory permissions
  sched: sched_clock_idle_[sleep|wakeup]_event()
parents 0542170d 505c0efd
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -292,7 +292,6 @@ static struct clocksource clocksource_tsc = {

void mark_tsc_unstable(char *reason)
{
	sched_clock_unstable_event();
	if (!tsc_unstable) {
		tsc_unstable = 1;
		tsc_enabled = 0;
+25 −7
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@
ACPI_MODULE_NAME("processor_idle");
#define ACPI_PROCESSOR_FILE_POWER	"power"
#define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
#define PM_TIMER_TICK_NS		(1000000000ULL/PM_TIMER_FREQUENCY)
#define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
#define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
static void (*pm_idle_save) (void) __read_mostly;
@@ -462,6 +463,9 @@ static void acpi_processor_idle(void)
		 * TBD: Can't get time duration while in C1, as resumes
		 *      go to an ISR rather than here.  Need to instrument
		 *      base interrupt handler.
		 *
		 * Note: the TSC better not stop in C1, sched_clock() will
		 *       skew otherwise.
		 */
		sleep_ticks = 0xFFFFFFFF;
		break;
@@ -469,6 +473,8 @@ static void acpi_processor_idle(void)
	case ACPI_STATE_C2:
		/* Get start time (ticks) */
		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
		/* Tell the scheduler that we are going deep-idle: */
		sched_clock_idle_sleep_event();
		/* Invoke C2 */
		acpi_state_timer_broadcast(pr, cx, 1);
		acpi_cstate_enter(cx);
@@ -479,17 +485,22 @@ static void acpi_processor_idle(void)
		/* TSC halts in C2, so notify users */
		mark_tsc_unstable("possible TSC halt in C2");
#endif
		/* Compute time (ticks) that we were actually asleep */
		sleep_ticks = ticks_elapsed(t1, t2);

		/* Tell the scheduler how much we idled: */
		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);

		/* Re-enable interrupts */
		local_irq_enable();
		/* Do not account our idle-switching overhead: */
		sleep_ticks -= cx->latency_ticks + C2_OVERHEAD;

		current_thread_info()->status |= TS_POLLING;
		/* Compute time (ticks) that we were actually asleep */
		sleep_ticks =
		    ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
		acpi_state_timer_broadcast(pr, cx, 0);
		break;

	case ACPI_STATE_C3:

		/*
		 * disable bus master
		 * bm_check implies we need ARB_DIS
@@ -518,6 +529,8 @@ static void acpi_processor_idle(void)
		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
		/* Invoke C3 */
		acpi_state_timer_broadcast(pr, cx, 1);
		/* Tell the scheduler that we are going deep-idle: */
		sched_clock_idle_sleep_event();
		acpi_cstate_enter(cx);
		/* Get end time (ticks) */
		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
@@ -531,12 +544,17 @@ static void acpi_processor_idle(void)
		/* TSC halts in C3, so notify users */
		mark_tsc_unstable("TSC halts in C3");
#endif
		/* Compute time (ticks) that we were actually asleep */
		sleep_ticks = ticks_elapsed(t1, t2);
		/* Tell the scheduler how much we idled: */
		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);

		/* Re-enable interrupts */
		local_irq_enable();
		/* Do not account our idle-switching overhead: */
		sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;

		current_thread_info()->status |= TS_POLLING;
		/* Compute time (ticks) that we were actually asleep */
		sleep_ticks =
		    ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
		acpi_state_timer_broadcast(pr, cx, 0);
		break;

+29 −15
Original line number Diff line number Diff line
@@ -320,7 +320,21 @@ int proc_pid_status(struct task_struct *task, char *buffer)
	return buffer - orig;
}

static clock_t task_utime(struct task_struct *p)
/*
 * Use precise platform statistics if available:
 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
static cputime_t task_utime(struct task_struct *p)
{
	return p->utime;
}

static cputime_t task_stime(struct task_struct *p)
{
	return p->stime;
}
#else
static cputime_t task_utime(struct task_struct *p)
{
	clock_t utime = cputime_to_clock_t(p->utime),
		total = utime + cputime_to_clock_t(p->stime);
@@ -337,10 +351,10 @@ static clock_t task_utime(struct task_struct *p)
	}
	utime = (clock_t)temp;

	return utime;
	return clock_t_to_cputime(utime);
}

static clock_t task_stime(struct task_struct *p)
static cputime_t task_stime(struct task_struct *p)
{
	clock_t stime;

@@ -349,10 +363,12 @@ static clock_t task_stime(struct task_struct *p)
	 * the total, to make sure the total observed by userspace
	 * grows monotonically - apps rely on that):
	 */
	stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
			cputime_to_clock_t(task_utime(p));

	return stime;
	return clock_t_to_cputime(stime);
}
#endif

static int do_task_stat(struct task_struct *task, char *buffer, int whole)
{
@@ -368,8 +384,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
	unsigned long long start_time;
	unsigned long cmin_flt = 0, cmaj_flt = 0;
	unsigned long  min_flt = 0,  maj_flt = 0;
	cputime_t cutime, cstime;
	clock_t utime, stime;
	cputime_t cutime, cstime, utime, stime;
	unsigned long rsslim = 0;
	char tcomm[sizeof(task->comm)];
	unsigned long flags;
@@ -387,8 +402,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)

	sigemptyset(&sigign);
	sigemptyset(&sigcatch);
	cutime = cstime = cputime_zero;
	utime = stime = 0;
	cutime = cstime = utime = stime = cputime_zero;

	rcu_read_lock();
	if (lock_task_sighand(task, &flags)) {
@@ -414,15 +428,15 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
			do {
				min_flt += t->min_flt;
				maj_flt += t->maj_flt;
				utime += task_utime(t);
				stime += task_stime(t);
				utime = cputime_add(utime, task_utime(t));
				stime = cputime_add(stime, task_stime(t));
				t = next_thread(t);
			} while (t != task);

			min_flt += sig->min_flt;
			maj_flt += sig->maj_flt;
			utime += cputime_to_clock_t(sig->utime);
			stime += cputime_to_clock_t(sig->stime);
			utime = cputime_add(utime, sig->utime);
			stime = cputime_add(stime, sig->stime);
		}

		sid = signal_session(sig);
@@ -471,8 +485,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
		cmin_flt,
		maj_flt,
		cmaj_flt,
		utime,
		stime,
		cputime_to_clock_t(utime),
		cputime_to_clock_t(stime),
		cputime_to_clock_t(cutime),
		cputime_to_clock_t(cstime),
		priority,
+3 −2
Original line number Diff line number Diff line
@@ -681,7 +681,7 @@ enum cpu_idle_type {
#define SCHED_LOAD_SHIFT	10
#define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)

#define SCHED_LOAD_SCALE_FUZZ	(SCHED_LOAD_SCALE >> 1)
#define SCHED_LOAD_SCALE_FUZZ	SCHED_LOAD_SCALE

#ifdef CONFIG_SMP
#define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
@@ -1388,7 +1388,8 @@ extern void sched_exec(void);
#define sched_exec()   {}
#endif

extern void sched_clock_unstable_event(void);
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);

#ifdef CONFIG_HOTPLUG_CPU
extern void idle_task_exit(void);
+51 −17
Original line number Diff line number Diff line
@@ -262,7 +262,8 @@ struct rq {
	s64 clock_max_delta;

	unsigned int clock_warps, clock_overflows;
	unsigned int clock_unstable_events;
	u64 idle_clock;
	unsigned int clock_deep_idle_events;
	u64 tick_timestamp;

	atomic_t nr_iowait;
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void)
}

/*
 * CPU frequency is/was unstable - start new by setting prev_clock_raw:
 * We are going deep-idle (irqs are disabled):
 */
void sched_clock_unstable_event(void)
void sched_clock_idle_sleep_event(void)
{
	unsigned long flags;
	struct rq *rq;
	struct rq *rq = cpu_rq(smp_processor_id());

	rq = task_rq_lock(current, &flags);
	rq->prev_clock_raw = sched_clock();
	rq->clock_unstable_events++;
	task_rq_unlock(rq, &flags);
	spin_lock(&rq->lock);
	__update_rq_clock(rq);
	spin_unlock(&rq->lock);
	rq->clock_deep_idle_events++;
}
EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);

/*
 * We just idled delta nanoseconds (called with irqs disabled):
 */
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
	struct rq *rq = cpu_rq(smp_processor_id());
	u64 now = sched_clock();

	rq->idle_clock += delta_ns;
	/*
	 * Override the previous timestamp and ignore all
	 * sched_clock() deltas that occured while we idled,
	 * and use the PM-provided delta_ns to advance the
	 * rq clock:
	 */
	spin_lock(&rq->lock);
	rq->prev_clock_raw = now;
	rq->clock += delta_ns;
	spin_unlock(&rq->lock);
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);

/*
 * resched_task - mark a task 'to be rescheduled now'.
@@ -2494,7 +2517,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
	 * a think about bumping its value to force at least one task to be
	 * moved
	 */
	if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) {
	if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) {
		unsigned long tmp, pwr_now, pwr_move;
		unsigned int imbn;

@@ -3020,6 +3043,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
	struct sched_domain *sd;
	/* Earliest time when we have to do rebalance again */
	unsigned long next_balance = jiffies + 60*HZ;
	int update_next_balance = 0;

	for_each_domain(cpu, sd) {
		if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3056,8 +3080,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
		if (sd->flags & SD_SERIALIZE)
			spin_unlock(&balancing);
out:
		if (time_after(next_balance, sd->last_balance + interval))
		if (time_after(next_balance, sd->last_balance + interval)) {
			next_balance = sd->last_balance + interval;
			update_next_balance = 1;
		}

		/*
		 * Stop the load balance at this level. There is another
@@ -3067,6 +3093,13 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
		if (!balance)
			break;
	}

	/*
	 * next_balance will be updated only when there is a need.
	 * When the cpu is attached to null domain for ex, it will not be
	 * updated.
	 */
	if (likely(update_next_balance))
		rq->next_balance = next_balance;
}

@@ -4890,7 +4923,7 @@ static inline void sched_init_granularity(void)
	if (sysctl_sched_granularity > gran_limit)
		sysctl_sched_granularity = gran_limit;

	sysctl_sched_runtime_limit = sysctl_sched_granularity * 4;
	sysctl_sched_runtime_limit = sysctl_sched_granularity * 8;
	sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2;
}

@@ -5234,15 +5267,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
static struct ctl_table sd_ctl_dir[] = {
	{
		.procname	= "sched_domain",
		.mode		= 0755,
		.mode		= 0555,
	},
	{0,},
};

static struct ctl_table sd_ctl_root[] = {
	{
		.ctl_name	= CTL_KERN,
		.procname	= "kernel",
		.mode		= 0755,
		.mode		= 0555,
		.child		= sd_ctl_dir,
	},
	{0,},
@@ -5318,7 +5352,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
	for_each_domain(cpu, sd) {
		snprintf(buf, 32, "domain%d", i);
		entry->procname = kstrdup(buf, GFP_KERNEL);
		entry->mode = 0755;
		entry->mode = 0555;
		entry->child = sd_alloc_ctl_domain_table(sd);
		entry++;
		i++;
@@ -5338,7 +5372,7 @@ static void init_sched_domain_sysctl(void)
	for (i = 0; i < cpu_num; i++, entry++) {
		snprintf(buf, 32, "cpu%d", i);
		entry->procname = kstrdup(buf, GFP_KERNEL);
		entry->mode = 0755;
		entry->mode = 0555;
		entry->child = sd_alloc_ctl_cpu_table(i);
	}
	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
Loading