Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1b61b910 authored by Zhang Yanmin's avatar Zhang Yanmin Committed by Linus Torvalds
Browse files

[PATCH] x86: kernel irq balance doesn't work



On i386, kernel irq balance doesn't work.

1) In function do_irq_balance, after kernel finds the min_loaded cpu but
   before calling set_pending_irq to really pin the selected_irq to the
   target cpu, kernel does a cpus_and with irq_affinity[selected_irq].
   Later on, when the irq is acked, kernel would calls
   move_native_irq=>desc->handler->set_affinity to change the irq affinity.
    However, every function pointed by
   hw_interrupt_type->set_affinity(unsigned int irq, cpumask_t cpumask)
   always changes irq_affinity[irq] to cpumask.  Next time when recalling
   do_irq_balance, it has to do cpu_ands again with
   irq_affinity[selected_irq], but irq_affinity[selected_irq] already
   becomes one cpu selected by the first irq balance.

2) Function balance_irq in file arch/i386/kernel/io_apic.c has the same
   issue.

[akpm@osdl.org: cleanups]
Signed-off-by: default avatarZhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent afa024c3
Loading
Loading
Loading
Loading
+20 −11
Original line number Diff line number Diff line
@@ -275,10 +275,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
#  define Dprintk(x...) 
# endif


#define IRQBALANCE_CHECK_ARCH -999
static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
static int physical_balance = 0;
#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
#define BALANCED_IRQ_LESS_DELTA		(HZ)

static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
static int physical_balance __read_mostly;
static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;

static struct irq_cpu_info {
	unsigned long * last_irq;
@@ -297,12 +302,14 @@ static struct irq_cpu_info {

#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))

#define MAX_BALANCED_IRQ_INTERVAL	(5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL	(HZ/2)
#define BALANCED_IRQ_MORE_DELTA		(HZ/10)
#define BALANCED_IRQ_LESS_DELTA		(HZ)
static cpumask_t balance_irq_affinity[NR_IRQS] = {
	[0 ... NR_IRQS-1] = CPU_MASK_ALL
};

static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
{
	balance_irq_affinity[irq] = mask;
}

static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
			unsigned long now, int direction)
@@ -340,7 +347,7 @@ static inline void balance_irq(int cpu, int irq)
	if (irqbalance_disabled)
		return; 

	cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
	cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
	new_cpu = move(cpu, allowed_mask, now, 1);
	if (cpu != new_cpu) {
		set_pending_irq(irq, cpumask_of_cpu(new_cpu));
@@ -529,7 +536,9 @@ static void do_irq_balance(void)
		}
	}

	cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
	cpus_and(allowed_mask,
		cpu_online_map,
		balance_irq_affinity[selected_irq]);
	target_cpu_mask = cpumask_of_cpu(min_loaded);
	cpus_and(tmp, target_cpu_mask, allowed_mask);

+8 −0
Original line number Diff line number Diff line
@@ -164,6 +164,14 @@ static inline void set_irq_info(int irq, cpumask_t mask)

#endif // CONFIG_SMP

#ifdef CONFIG_IRQBALANCE
extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
#else
static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
{
}
#endif

extern int no_irq_affinity;
extern int noirqdebug_setup(char *str);

+3 −0
Original line number Diff line number Diff line
@@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
#ifdef CONFIG_GENERIC_PENDING_IRQ
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
	set_balance_irq_affinity(irq, mask_val);

	/*
	 * Save these away for later use. Re-progam when the
	 * interrupt is pending
@@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
#else
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
	set_balance_irq_affinity(irq, mask_val);
	irq_affinity[irq] = mask_val;
	irq_desc[irq].handler->set_affinity(irq, mask_val);
}