Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 48a1b10a authored by Yinghai Lu's avatar Yinghai Lu Committed by Ingo Molnar
Browse files

x86, sparseirq: move irq_desc according to smp_affinity, v7



Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes

if CONFIG_NUMA_MIGRATE_IRQ_DESC is set:

-  make irq_desc to go with affinity aka irq_desc moving etc
-  call move_irq_desc in irq_complete_move()
-  legacy irq_desc is not moved, because they are allocated via static array

for logical apic mode, need to add move_desc_in_progress_in_same_domain,
otherwise it will not be moved ==> also could need two phases to get
irq_desc moved.

Signed-off-by: default avatarYinghai Lu <yinghai@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 13bd41bc
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -248,6 +248,15 @@ config SPARSE_IRQ

	  If you don't know what to do here, say Y.

config NUMA_MIGRATE_IRQ_DESC
	bool "Move irq desc when changing irq smp_affinity"
	depends on SPARSE_IRQ && SMP
	default n
	help
	  This enables moving irq_desc to cpu/node that irq will use handled.

	  If you don't know what to do here, say N.

config X86_FIND_SMP_CONFIG
	def_bool y
	depends on X86_MPPARSE || X86_VOYAGER
+141 −1
Original line number Diff line number Diff line
@@ -141,6 +141,9 @@ struct irq_cfg {
	unsigned move_cleanup_count;
	u8 vector;
	u8 move_in_progress : 1;
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
	u8 move_desc_pending : 1;
#endif
};

/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
	}
}

#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC

static void
init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
{
	struct irq_pin_list *old_entry, *head, *tail, *entry;

	cfg->irq_2_pin = NULL;
	old_entry = old_cfg->irq_2_pin;
	if (!old_entry)
		return;

	entry = get_one_free_irq_2_pin(cpu);
	if (!entry)
		return;

	entry->apic	= old_entry->apic;
	entry->pin	= old_entry->pin;
	head		= entry;
	tail		= entry;
	old_entry	= old_entry->next;
	while (old_entry) {
		entry = get_one_free_irq_2_pin(cpu);
		if (!entry) {
			entry = head;
			while (entry) {
				head = entry->next;
				kfree(entry);
				entry = head;
			}
			/* still use the old one */
			return;
		}
		entry->apic	= old_entry->apic;
		entry->pin	= old_entry->pin;
		tail->next	= entry;
		tail		= entry;
		old_entry	= old_entry->next;
	}

	tail->next = NULL;
	cfg->irq_2_pin = head;
}

static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
{
	struct irq_pin_list *entry, *next;

	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
		return;

	entry = old_cfg->irq_2_pin;

	while (entry) {
		next = entry->next;
		kfree(entry);
		entry = next;
	}
	old_cfg->irq_2_pin = NULL;
}

void arch_init_copy_chip_data(struct irq_desc *old_desc,
				 struct irq_desc *desc, int cpu)
{
	struct irq_cfg *cfg;
	struct irq_cfg *old_cfg;

	cfg = get_one_free_irq_cfg(cpu);

	if (!cfg)
		return;

	desc->chip_data = cfg;

	old_cfg = old_desc->chip_data;

	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));

	init_copy_irq_2_pin(old_cfg, cfg, cpu);
}

static void free_irq_cfg(struct irq_cfg *old_cfg)
{
	kfree(old_cfg);
}

void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
{
	struct irq_cfg *old_cfg, *cfg;

	old_cfg = old_desc->chip_data;
	cfg = desc->chip_data;

	if (old_cfg == cfg)
		return;

	if (old_cfg) {
		free_irq_2_pin(old_cfg, cfg);
		free_irq_cfg(old_cfg);
		old_desc->chip_data = NULL;
	}
}

static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
{
	struct irq_cfg *cfg = desc->chip_data;

	if (!cfg->move_in_progress) {
		/* it means that domain is not changed */
		if (!cpus_intersects(desc->affinity, mask))
			cfg->move_desc_pending = 1;
	}
}
#endif

#else
static struct irq_cfg *irq_cfg(unsigned int irq)
{
@@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq)

#endif

#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
{
}
#endif

struct io_apic {
	unsigned int index;
@@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp)
	struct irq_cfg *cfg = desc->chip_data;
	unsigned vector, me;

	if (likely(!cfg->move_in_progress))
	if (likely(!cfg->move_in_progress)) {
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
		if (likely(!cfg->move_desc_pending))
			return;

		/* domain is not change, but affinity is changed */
		me = smp_processor_id();
		if (cpu_isset(me, desc->affinity)) {
			*descp = desc = move_irq_desc(desc, me);
			/* get the new one */
			cfg = desc->chip_data;
			cfg->move_desc_pending = 0;
		}
#endif
		return;
	}

	vector = ~get_irq_regs()->orig_ax;
	me = smp_processor_id();
	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
		cpumask_t cleanup_mask;

#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
		*descp = desc = move_irq_desc(desc, me);
		/* get the new one */
		cfg = desc->chip_data;
#endif

		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+10 −0
Original line number Diff line number Diff line
@@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);

#endif

static inline struct irq_desc *
irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
{
#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
	return irq_to_desc(irq);
#else
	return desc;
#endif
}

/*
 * Migration helpers for obsolete names, they will go away:
 */
+1 −0
Original line number Diff line number Diff line
@@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+10 −2
Original line number Diff line number Diff line
@@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)

	spin_lock(&desc->lock);
	mask_ack_irq(desc, irq);
	desc = irq_remap_to_desc(irq, desc);

	if (unlikely(desc->status & IRQ_INPROGRESS))
		goto out_unlock;
@@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
	desc->status &= ~IRQ_INPROGRESS;
out:
	desc->chip->eoi(irq);
	desc = irq_remap_to_desc(irq, desc);

	spin_unlock(&desc->lock);
}
@@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
		    !desc->action)) {
		desc->status |= (IRQ_PENDING | IRQ_MASKED);
		mask_ack_irq(desc, irq);
		desc = irq_remap_to_desc(irq, desc);
		goto out_unlock;
	}
	kstat_incr_irqs_this_cpu(irq, desc);

	/* Start handling the irq */
	desc->chip->ack(irq);
	desc = irq_remap_to_desc(irq, desc);

	/* Mark the IRQ currently in progress.*/
	desc->status |= IRQ_INPROGRESS;
@@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
	if (!noirqdebug)
		note_interrupt(irq, desc, action_ret);

	if (desc->chip->eoi)
	if (desc->chip->eoi) {
		desc->chip->eoi(irq);
		desc = irq_remap_to_desc(irq, desc);
	}
}

void
@@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,

	/* Uninstall? */
	if (handle == handle_bad_irq) {
		if (desc->chip != &no_irq_chip)
		if (desc->chip != &no_irq_chip) {
			mask_ack_irq(desc, irq);
			desc = irq_remap_to_desc(irq, desc);
		}
		desc->status |= IRQ_DISABLED;
		desc->depth = 1;
	}
Loading