Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4b060420 authored by Mike Travis's avatar Mike Travis Committed by Linus Torvalds
Browse files

bitmap, irq: add smp_affinity_list interface to /proc/irq



Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large.

Setting smp affinity to cpus 256 to 263 would be:

	echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity

instead of:

	echo 256-263 > smp_affinity_list

Think about what it looks like for cpus around say, 4088 to 4095.

We already have many alternate "list" interfaces:

/sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/node/nodeX/cpulist
/sys/devices/pci***/***/local_cpulist

Add a companion interface, smp_affinity_list to use cpu lists instead of
cpu maps.  This conforms to other companion interfaces where both a map
and a list interface exists.

This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.

[akpm@linux-foundation.org: make __bitmap_parselist() static]
Signed-off-by: default avatarMike Travis <travis@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e50c1f60
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -4,10 +4,11 @@ ChangeLog:

SMP IRQ affinity

/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted
for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed
to turn off all CPUs, and if an IRQ controller does not support IRQ
affinity then the value will not change from the default 0xffffffff.
/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
which target CPUs are permitted for a given IRQ source.  It's a bitmask
(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs.  It's not
allowed to turn off all CPUs, and if an IRQ controller does not support
IRQ affinity then the value will not change from the default of all cpus.

/proc/irq/default_smp_affinity specifies default affinity mask that applies
to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
@@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms
This time around IRQ44 was delivered only to the last four processors.
i.e counters for the CPU0-3 did not change.

Here is an example of limiting that same irq (44) to cpus 1024 to 1031:

[root@moon 44]# echo 1024-1031 > smp_affinity
[root@moon 44]# cat smp_affinity
1024-1031

Note that to do this with a bitmask would require 32 bitmasks of zero
to follow the pertinent one.
+9 −2
Original line number Diff line number Diff line
@@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default:
  > cat /proc/irq/0/smp_affinity
  ffffffff

There is an alternate interface, smp_affinity_list which allows specifying
a cpu range instead of a bitmask:

  > cat /proc/irq/0/smp_affinity_list
  1024-1031

The default_smp_affinity mask applies to all non-active IRQs, which are the
IRQs which have not yet been allocated/activated, and hence which lack a
/proc/irq/[0-9]* directory.
@@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not
include information about any possible driver locality preference.

prof_cpu_mask specifies which CPUs are to be profiled by the system wide
profiler. Default value is ffffffff (all cpus).
profiler. Default value is ffffffff (all cpus if there are only 32 of them).

The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
between all the CPUs which are allowed to handle it. As usual the kernel has
more info than you and does a better job than you, so the defaults are the
best choice for almost everyone.
best choice for almost everyone.  [Note this applies only to those IO-APIC's
that support "Round Robin" interrupt distribution.]

There are  three  more  important subdirectories in /proc: net, scsi, and sys.
The general  rule  is  that  the  contents,  or  even  the  existence of these
+4 −1
Original line number Diff line number Diff line
@@ -55,7 +55,8 @@
 * bitmap_parse(buf, buflen, dst, nbits)	Parse bitmap dst from kernel buf
 * bitmap_parse_user(ubuf, ulen, dst, nbits)	Parse bitmap dst from user buf
 * bitmap_scnlistprintf(buf, len, src, nbits)	Print bitmap src as list to buf
 * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from list
 * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from kernel buf
 * bitmap_parselist_user(buf, dst, nbits)	Parse bitmap dst from user buf
 * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
 * bitmap_release_region(bitmap, pos, order)	Free specified bit region
 * bitmap_allocate_region(bitmap, pos, order)	Allocate specified bit region
@@ -129,6 +130,8 @@ extern int bitmap_scnlistprintf(char *buf, unsigned int len,
			const unsigned long *src, int nbits);
extern int bitmap_parselist(const char *buf, unsigned long *maskp,
			int nmaskbits);
extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
			unsigned long *dst, int nbits);
extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
		const unsigned long *old, const unsigned long *new, int bits);
extern int bitmap_bitremap(int oldbit,
+15 −0
Original line number Diff line number Diff line
@@ -546,6 +546,21 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
}

/**
 * cpumask_parselist_user - extract a cpumask from a user string
 * @buf: the buffer to extract from
 * @len: the length of the buffer
 * @dstp: the cpumask to set.
 *
 * Returns -errno, or 0 for success.
 */
static inline int cpumask_parselist_user(const char __user *buf, int len,
				     struct cpumask *dstp)
{
	return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
							nr_cpumask_bits);
}

/**
 * cpulist_scnprintf - print a cpumask into a string as comma-separated list
 * @buf: the buffer to sprintf into
+50 −4
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ static struct proc_dir_entry *root_irq_dir;

#ifdef CONFIG_SMP

static int irq_affinity_proc_show(struct seq_file *m, void *v)
static int show_irq_affinity(int type, struct seq_file *m, void *v)
{
	struct irq_desc *desc = irq_to_desc((long)m->private);
	const struct cpumask *mask = desc->irq_data.affinity;
@@ -28,6 +28,9 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
	if (irqd_is_setaffinity_pending(&desc->irq_data))
		mask = desc->pending_mask;
#endif
	if (type)
		seq_cpumask_list(m, mask);
	else
		seq_cpumask(m, mask);
	seq_putc(m, '\n');
	return 0;
@@ -59,7 +62,18 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
#endif

int no_irq_affinity;
static ssize_t irq_affinity_proc_write(struct file *file,
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
	return show_irq_affinity(0, m, v);
}

static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
{
	return show_irq_affinity(1, m, v);
}


static ssize_t write_irq_affinity(int type, struct file *file,
		const char __user *buffer, size_t count, loff_t *pos)
{
	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
@@ -72,6 +86,9 @@ static ssize_t irq_affinity_proc_write(struct file *file,
	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
		return -ENOMEM;

	if (type)
		err = cpumask_parselist_user(buffer, count, new_value);
	else
		err = cpumask_parse_user(buffer, count, new_value);
	if (err)
		goto free_cpumask;
@@ -100,11 +117,28 @@ static ssize_t irq_affinity_proc_write(struct file *file,
	return err;
}

static ssize_t irq_affinity_proc_write(struct file *file,
		const char __user *buffer, size_t count, loff_t *pos)
{
	return write_irq_affinity(0, file, buffer, count, pos);
}

static ssize_t irq_affinity_list_proc_write(struct file *file,
		const char __user *buffer, size_t count, loff_t *pos)
{
	return write_irq_affinity(1, file, buffer, count, pos);
}

static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
}

static int irq_affinity_list_proc_open(struct inode *inode, struct file *file)
{
	return single_open(file, irq_affinity_list_proc_show, PDE(inode)->data);
}

static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
{
	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
@@ -125,6 +159,14 @@ static const struct file_operations irq_affinity_hint_proc_fops = {
	.release	= single_release,
};

static const struct file_operations irq_affinity_list_proc_fops = {
	.open		= irq_affinity_list_proc_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
	.write		= irq_affinity_list_proc_write,
};

static int default_affinity_show(struct seq_file *m, void *v)
{
	seq_cpumask(m, irq_default_affinity);
@@ -289,6 +331,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
	proc_create_data("affinity_hint", 0400, desc->dir,
			 &irq_affinity_hint_proc_fops, (void *)(long)irq);

	/* create /proc/irq/<irq>/smp_affinity_list */
	proc_create_data("smp_affinity_list", 0600, desc->dir,
			 &irq_affinity_list_proc_fops, (void *)(long)irq);

	proc_create_data("node", 0444, desc->dir,
			 &irq_node_proc_fops, (void *)(long)irq);
#endif
Loading