Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bbbfeac9 authored by Nathan Zimmer's avatar Nathan Zimmer Committed by Ingo Molnar
Browse files

sched: Fix /proc/sched_debug failure on very very large systems



On systems with 4096 cores attemping to read /proc/sched_debug
fails because we are trying to push all the data into a single
kmalloc buffer.

The issue is on these very large machines all the data will not
fit in 4mb.

A better solution is to not us the single_open mechanism but to
provide our own seq_operations and treat each cpu as an
individual record.

The output should be identical to the previous version.

Reported-by: default avatarDave Jones <davej@redhat.com>
Signed-off-by: default avatarNathan Zimmer <nzimmer@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>)
[ Whitespace fixlet]
[ Fix spello in comment]
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent cb152ff2
Loading
Loading
Loading
Loading
+79 −11
Original line number Diff line number Diff line
@@ -269,11 +269,11 @@ static void print_cpu(struct seq_file *m, int cpu)
	{
		unsigned int freq = cpu_khz ? : 1;

		SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n",
		SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
			   cpu, freq / 1000, (freq % 1000));
	}
#else
	SEQ_printf(m, "\ncpu#%d\n", cpu);
	SEQ_printf(m, "cpu#%d\n", cpu);
#endif

#define P(x)								\
@@ -330,6 +330,7 @@ do { \
	print_rq(m, rq, cpu);
	rcu_read_unlock();
	spin_unlock_irqrestore(&sched_debug_lock, flags);
	SEQ_printf(m, "\n");
}

static const char *sched_tunable_scaling_names[] = {
@@ -338,11 +339,10 @@ static const char *sched_tunable_scaling_names[] = {
	"linear"
};

static int sched_debug_show(struct seq_file *m, void *v)
static void sched_debug_header(struct seq_file *m)
{
	u64 ktime, sched_clk, cpu_clk;
	unsigned long flags;
	int cpu;

	local_irq_save(flags);
	ktime = ktime_to_ns(ktime_get());
@@ -384,33 +384,101 @@ static int sched_debug_show(struct seq_file *m, void *v)
#undef PN
#undef P

	SEQ_printf(m, "  .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
	SEQ_printf(m, "  .%-40s: %d (%s)\n",
		"sysctl_sched_tunable_scaling",
		sysctl_sched_tunable_scaling,
		sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
	SEQ_printf(m, "\n");
}

	for_each_online_cpu(cpu)
		print_cpu(m, cpu);
static int sched_debug_show(struct seq_file *m, void *v)
{
	int cpu = (unsigned long)(v - 2);

	SEQ_printf(m, "\n");
	if (cpu != -1)
		print_cpu(m, cpu);
	else
		sched_debug_header(m);

	return 0;
}

void sysrq_sched_debug_show(void)
{
	sched_debug_show(NULL, NULL);
	int cpu;

	sched_debug_header(NULL);
	for_each_online_cpu(cpu)
		print_cpu(NULL, cpu);

}

/*
 * This itererator needs some explanation.
 * It returns 1 for the header position.
 * This means 2 is cpu 0.
 * In a hotplugged system some cpus, including cpu 0, may be missing so we have
 * to use cpumask_* to iterate over the cpus.
 */
static void *sched_debug_start(struct seq_file *file, loff_t *offset)
{
	unsigned long n = *offset;

	if (n == 0)
		return (void *) 1;

	n--;

	if (n > 0)
		n = cpumask_next(n - 1, cpu_online_mask);
	else
		n = cpumask_first(cpu_online_mask);

	*offset = n + 1;

	if (n < nr_cpu_ids)
		return (void *)(unsigned long)(n + 2);
	return NULL;
}

static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
{
	(*offset)++;
	return sched_debug_start(file, offset);
}

static void sched_debug_stop(struct seq_file *file, void *data)
{
}

static const struct seq_operations sched_debug_sops = {
	.start = sched_debug_start,
	.next = sched_debug_next,
	.stop = sched_debug_stop,
	.show = sched_debug_show,
};

static int sched_debug_release(struct inode *inode, struct file *file)
{
	seq_release(inode, file);

	return 0;
}

static int sched_debug_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, sched_debug_show, NULL);
	int ret = 0;

	ret = seq_open(filp, &sched_debug_sops);

	return ret;
}

static const struct file_operations sched_debug_fops = {
	.open		= sched_debug_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
	.release	= sched_debug_release,
};

static int __init init_sched_debug_procfs(void)