Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8b8e8c1b authored by Yinghai Lu's avatar Yinghai Lu Committed by Ingo Molnar
Browse files

x86: remove irqbalance in kernel for 32 bit



This has been deprecated for years, the user space irqbalanced utility
works better with numa, has configurable policies, etc...

Signed-off-by: default avatarYinghai Lu <yhlu.kernel@gmai.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 6d50bc26
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -1254,14 +1254,6 @@ config EFI
  	resultant kernel should continue to boot on existing non-EFI
  	platforms.

config IRQBALANCE
	def_bool y
	prompt "Enable kernel irq balancing"
	depends on X86_32 && SMP && X86_IO_APIC
	help
	  The default yes will allow the kernel to do irq load balancing.
	  Saying no will keep the kernel from doing irq load balancing.

config SECCOMP
	def_bool y
	prompt "Enable seccomp to safely compute untrusted bytecode"
+0 −1
Original line number Diff line number Diff line
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
# CONFIG_MTRR_SANITIZER is not set
CONFIG_X86_PAT=y
CONFIG_EFI=y
# CONFIG_IRQBALANCE is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
+0 −402
Original line number Diff line number Diff line
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
	spin_unlock_irqrestore(&ioapic_lock, flags);
}

#if defined(CONFIG_IRQBALANCE)
# include <asm/processor.h>	/* kernel_thread() */
# include <linux/kernel_stat.h>	/* kstat */
# include <linux/slab.h>		/* kmalloc() */
# include <linux/timer.h>

#define IRQBALANCE_CHECK_ARCH -999
#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
#define BALANCED_IRQ_LESS_DELTA		(HZ)

static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
static int physical_balance __read_mostly;
static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;

static struct irq_cpu_info {
	unsigned long *last_irq;
	unsigned long *irq_delta;
	unsigned long irq;
} irq_cpu_data[NR_CPUS];

#define CPU_IRQ(cpu)		(irq_cpu_data[cpu].irq)
#define LAST_CPU_IRQ(cpu, irq)   (irq_cpu_data[cpu].last_irq[irq])
#define IRQ_DELTA(cpu, irq) 	(irq_cpu_data[cpu].irq_delta[irq])

#define IDLE_ENOUGH(cpu,now) \
	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))

#define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)

#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))

static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;

static cpumask_t *balance_irq_affinity;


static void __init irq_affinity_init_work(void *data)
{
	struct dyn_array *da = data;

	int i;
	struct  balance_irq_affinity *affinity;

	affinity = *da->name;

	for (i = 0; i < *da->nr; i++)
		memcpy(&affinity[i], &balance_irq_affinity_init,
			 sizeof(struct balance_irq_affinity));

}

DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);


void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
{
	balance_irq_affinity[irq] = mask;
}

static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
			unsigned long now, int direction)
{
	int search_idle = 1;
	int cpu = curr_cpu;

	goto inside;

	do {
		if (unlikely(cpu == curr_cpu))
			search_idle = 0;
inside:
		if (direction == 1) {
			cpu++;
			if (cpu >= NR_CPUS)
				cpu = 0;
		} else {
			cpu--;
			if (cpu == -1)
				cpu = NR_CPUS-1;
		}
	} while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
			(search_idle && !IDLE_ENOUGH(cpu, now)));

	return cpu;
}

static inline void balance_irq(int cpu, int irq)
{
	unsigned long now = jiffies;
	cpumask_t allowed_mask;
	unsigned int new_cpu;

	if (irqbalance_disabled)
		return;

	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
	new_cpu = move(cpu, allowed_mask, now, 1);
	if (cpu != new_cpu)
		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
}

static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
{
	int i, j;
	struct irq_desc *desc;

	for_each_online_cpu(i) {
		for (j = 0; j < nr_irqs; j++) {
			desc = irq_to_desc(j);
			if (!desc->action)
				continue;
			/* Is it a significant load ?  */
			if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
						useful_load_threshold)
				continue;
			balance_irq(i, j);
		}
	}
	balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
		balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
	return;
}

static void do_irq_balance(void)
{
	int i, j;
	unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
	unsigned long move_this_load = 0;
	int max_loaded = 0, min_loaded = 0;
	int load;
	unsigned long useful_load_threshold = balanced_irq_interval + 10;
	int selected_irq;
	int tmp_loaded, first_attempt = 1;
	unsigned long tmp_cpu_irq;
	unsigned long imbalance = 0;
	cpumask_t allowed_mask, target_cpu_mask, tmp;
	struct irq_desc *desc;

	for_each_possible_cpu(i) {
		int package_index;
		CPU_IRQ(i) = 0;
		if (!cpu_online(i))
			continue;
		package_index = CPU_TO_PACKAGEINDEX(i);
		for (j = 0; j < nr_irqs; j++) {
			unsigned long value_now, delta;
			/* Is this an active IRQ or balancing disabled ? */
			desc = irq_to_desc(j);
			if (!desc->action || irq_balancing_disabled(j))
				continue;
			if (package_index == i)
				IRQ_DELTA(package_index, j) = 0;
			/* Determine the total count per processor per IRQ */
			value_now = (unsigned long) kstat_irqs_cpu(j, i);

			/* Determine the activity per processor per IRQ */
			delta = value_now - LAST_CPU_IRQ(i, j);

			/* Update last_cpu_irq[][] for the next time */
			LAST_CPU_IRQ(i, j) = value_now;

			/* Ignore IRQs whose rate is less than the clock */
			if (delta < useful_load_threshold)
				continue;
			/* update the load for the processor or package total */
			IRQ_DELTA(package_index, j) += delta;

			/* Keep track of the higher numbered sibling as well */
			if (i != package_index)
				CPU_IRQ(i) += delta;
			/*
			 * We have sibling A and sibling B in the package
			 *
			 * cpu_irq[A] = load for cpu A + load for cpu B
			 * cpu_irq[B] = load for cpu B
			 */
			CPU_IRQ(package_index) += delta;
		}
	}
	/* Find the least loaded processor package */
	for_each_online_cpu(i) {
		if (i != CPU_TO_PACKAGEINDEX(i))
			continue;
		if (min_cpu_irq > CPU_IRQ(i)) {
			min_cpu_irq = CPU_IRQ(i);
			min_loaded = i;
		}
	}
	max_cpu_irq = ULONG_MAX;

tryanothercpu:
	/*
	 * Look for heaviest loaded processor.
	 * We may come back to get the next heaviest loaded processor.
	 * Skip processors with trivial loads.
	 */
	tmp_cpu_irq = 0;
	tmp_loaded = -1;
	for_each_online_cpu(i) {
		if (i != CPU_TO_PACKAGEINDEX(i))
			continue;
		if (max_cpu_irq <= CPU_IRQ(i))
			continue;
		if (tmp_cpu_irq < CPU_IRQ(i)) {
			tmp_cpu_irq = CPU_IRQ(i);
			tmp_loaded = i;
		}
	}

	if (tmp_loaded == -1) {
	 /*
	  * In the case of small number of heavy interrupt sources,
	  * loading some of the cpus too much. We use Ingo's original
	  * approach to rotate them around.
	  */
		if (!first_attempt && imbalance >= useful_load_threshold) {
			rotate_irqs_among_cpus(useful_load_threshold);
			return;
		}
		goto not_worth_the_effort;
	}

	first_attempt = 0;		/* heaviest search */
	max_cpu_irq = tmp_cpu_irq;	/* load */
	max_loaded = tmp_loaded;	/* processor */
	imbalance = (max_cpu_irq - min_cpu_irq) / 2;

	/*
	 * if imbalance is less than approx 10% of max load, then
	 * observe diminishing returns action. - quit
	 */
	if (imbalance < (max_cpu_irq >> 3))
		goto not_worth_the_effort;

tryanotherirq:
	/* if we select an IRQ to move that can't go where we want, then
	 * see if there is another one to try.
	 */
	move_this_load = 0;
	selected_irq = -1;
	for (j = 0; j < nr_irqs; j++) {
		/* Is this an active IRQ? */
		desc = irq_to_desc(j);
		if (!desc->action)
			continue;
		if (imbalance <= IRQ_DELTA(max_loaded, j))
			continue;
		/* Try to find the IRQ that is closest to the imbalance
		 * without going over.
		 */
		if (move_this_load < IRQ_DELTA(max_loaded, j)) {
			move_this_load = IRQ_DELTA(max_loaded, j);
			selected_irq = j;
		}
	}
	if (selected_irq == -1)
		goto tryanothercpu;

	imbalance = move_this_load;

	/* For physical_balance case, we accumulated both load
	 * values in the one of the siblings cpu_irq[],
	 * to use the same code for physical and logical processors
	 * as much as possible.
	 *
	 * NOTE: the cpu_irq[] array holds the sum of the load for
	 * sibling A and sibling B in the slot for the lowest numbered
	 * sibling (A), _AND_ the load for sibling B in the slot for
	 * the higher numbered sibling.
	 *
	 * We seek the least loaded sibling by making the comparison
	 * (A+B)/2 vs B
	 */
	load = CPU_IRQ(min_loaded) >> 1;
	for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
		if (load > CPU_IRQ(j)) {
			/* This won't change cpu_sibling_map[min_loaded] */
			load = CPU_IRQ(j);
			min_loaded = j;
		}
	}

	cpus_and(allowed_mask,
		cpu_online_map,
		balance_irq_affinity[selected_irq]);
	target_cpu_mask = cpumask_of_cpu(min_loaded);
	cpus_and(tmp, target_cpu_mask, allowed_mask);

	if (!cpus_empty(tmp)) {
		/* mark for change destination */
		set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));

		/* Since we made a change, come back sooner to
		 * check for more variation.
		 */
		balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
			balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
		return;
	}
	goto tryanotherirq;

not_worth_the_effort:
	/*
	 * if we did not find an IRQ to move, then adjust the time interval
	 * upward
	 */
	balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
		balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
	return;
}

static int balanced_irq(void *unused)
{
	int i;
	unsigned long prev_balance_time = jiffies;
	long time_remaining = balanced_irq_interval;
	struct irq_desc *desc;

	/* push everything to CPU 0 to give us a starting point.  */
	for (i = 0 ; i < nr_irqs ; i++) {
		desc = irq_to_desc(i);
		desc->pending_mask = cpumask_of_cpu(0);
		set_pending_irq(i, cpumask_of_cpu(0));
	}

	set_freezable();
	for ( ; ; ) {
		time_remaining = schedule_timeout_interruptible(time_remaining);
		try_to_freeze();
		if (time_after(jiffies,
				prev_balance_time+balanced_irq_interval)) {
			preempt_disable();
			do_irq_balance();
			prev_balance_time = jiffies;
			time_remaining = balanced_irq_interval;
			preempt_enable();
		}
	}
	return 0;
}

static int __init balanced_irq_init(void)
{
	int i;
	struct cpuinfo_x86 *c;
	cpumask_t tmp;

	cpus_shift_right(tmp, cpu_online_map, 2);
	c = &boot_cpu_data;
	/* When not overwritten by the command line ask subarchitecture. */
	if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
		irqbalance_disabled = NO_BALANCE_IRQ;
	if (irqbalance_disabled)
		return 0;

	 /* disable irqbalance completely if there is only one processor online */
	if (num_online_cpus() < 2) {
		irqbalance_disabled = 1;
		return 0;
	}
	/*
	 * Enable physical balance only if more than 1 physical processor
	 * is present
	 */
	if (smp_num_siblings > 1 && !cpus_empty(tmp))
		physical_balance = 1;

	for_each_online_cpu(i) {
		irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
		irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
		if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
			printk(KERN_ERR "balanced_irq_init: out of memory");
			goto failed;
		}
	}

	printk(KERN_INFO "Starting balanced_irq\n");
	if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
		return 0;
	printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
failed:
	for_each_possible_cpu(i) {
		kfree(irq_cpu_data[i].irq_delta);
		irq_cpu_data[i].irq_delta = NULL;
		kfree(irq_cpu_data[i].last_irq);
		irq_cpu_data[i].last_irq = NULL;
	}
	return 0;
}

int __devinit irqbalance_disable(char *str)
{
	irqbalance_disabled = 1;
	return 1;
}

__setup("noirqbalance", irqbalance_disable);

late_initcall(balanced_irq_init);
#endif /* CONFIG_IRQBALANCE */
#endif /* CONFIG_SMP */

#ifndef CONFIG_SMP
+0 −3
Original line number Diff line number Diff line
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
	if (!(word & (1 << 13))) {
		dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
			"disabling irq balancing and affinity\n");
#ifdef CONFIG_IRQBALANCE
		irqbalance_disable("");
#endif
		noirqdebug_setup("");
#ifdef CONFIG_PROC_FS
		no_irq_affinity = 1;
+3 −11
Original line number Diff line number Diff line
@@ -185,7 +185,7 @@ struct irq_desc {
	cpumask_t		affinity;
	unsigned int		cpu;
#endif
#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
#ifdef CONFIG_GENERIC_PENDING_IRQ
	cpumask_t		pending_mask;
#endif
#ifdef CONFIG_PROC_FS
@@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);

#ifdef CONFIG_SMP

#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
#ifdef CONFIG_GENERIC_PENDING_IRQ

void set_pending_irq(unsigned int irq, cpumask_t mask);
void move_native_irq(int irq);
void move_masked_irq(int irq);

#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */
#else /* CONFIG_GENERIC_PENDING_IRQ */

static inline void move_irq(int irq)
{
@@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)

#endif /* CONFIG_SMP */

#ifdef CONFIG_IRQBALANCE
extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
#else
static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
{
}
#endif

extern int no_irq_affinity;

static inline int irq_balancing_disabled(unsigned int irq)
Loading