Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1f5a5b87 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

genirq: Implement a sane sparse_irq allocator



The current sparse_irq allocator has several short comings due to
failures in the design or the lack of it:

 - Requires iteration over the number of active irqs to find a free slot
   (Some architectures have grown their own workarounds for this)
 - Removal of entries is not possible
 - Racy between create_irq_nr and destroy_irq (plugged by horrible
   callbacks)
 - Migration of active irq descriptors is not possible
 - No bulk allocation of irq ranges
 - Sprinkeled irq_desc references all over the place outside of kernel/irq/
   (The previous chip functions series is addressing this issue)

Implement a sane allocator which fixes the above short comings (though
migration of active descriptors needs a full tree wide cleanup of the
direct and mostly unlocked access to irq_desc).

The new allocator still uses a radix_tree, but uses a bitmap for
keeping track of allocated irq numbers. That allows:

 - Fast lookup of a free slot
 - Allows the removal of descriptors
 - Prevents the create/destroy race
 - Bulk allocation of consecutive irq ranges
 - Basic design is ready for migration of life descriptors after
   further cleanups

The bitmap is also used in the SPARSE_IRQ=n case for lookup and
raceless (de)allocation of irq numbers. So it removes the requirement
for looping through the descriptor array to find slots.

Right now it uses sparse_irq_lock to protect the bitmap and the radix
tree, but after cleaning up all users we should be able convert that
to a mutex and to switch the radix_tree and decriptor allocations to
GFP_KERNEL.

[ Folded in a bugfix from Yinghai Lu ]

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarIngo Molnar <mingo@elte.hu>
parent 1318a481
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -398,6 +398,29 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d)
}
#endif

int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
void irq_free_descs(unsigned int irq, unsigned int cnt);

static inline int irq_alloc_desc(int node)
{
	return irq_alloc_descs(-1, 0, 1, node);
}

static inline int irq_alloc_desc_at(unsigned int at, int node)
{
	return irq_alloc_descs(at, at, 1, node);
}

static inline int irq_alloc_desc_from(unsigned int from, int node)
{
	return irq_alloc_descs(-1, from, 1, node);
}

static inline void irq_free_desc(unsigned int irq)
{
	irq_free_descs(irq, 1);
}

#endif /* CONFIG_GENERIC_HARDIRQS */

#endif /* !CONFIG_S390 */
+223 −8
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/radix-tree.h>
#include <linux/bitmap.h>

#include "internals.h"

@@ -33,9 +34,54 @@ static void __init init_irq_default_affinity(void)
}
#endif

#ifdef CONFIG_SMP
static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
{
	if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
		return -ENOMEM;

#ifdef CONFIG_GENERIC_PENDING_IRQ
	if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
		free_cpumask_var(desc->irq_data.affinity);
		return -ENOMEM;
	}
#endif
	return 0;
}

static void desc_smp_init(struct irq_desc *desc, int node)
{
	desc->node = node;
	cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
}

#else
static inline int
alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
static inline void desc_smp_init(struct irq_desc *desc, int node) { }
#endif

static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
{
	desc->irq_data.irq = irq;
	desc->irq_data.chip = &no_irq_chip;
	desc->irq_data.chip_data = NULL;
	desc->irq_data.handler_data = NULL;
	desc->irq_data.msi_desc = NULL;
	desc->status = IRQ_DEFAULT_INIT_FLAGS;
	desc->handle_irq = handle_bad_irq;
	desc->depth = 1;
	desc->name = NULL;
	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
	desc_smp_init(desc, node);
}

int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);

DEFINE_RAW_SPINLOCK(sparse_irq_lock);
static DECLARE_BITMAP(allocated_irqs, NR_IRQS);

#ifdef CONFIG_SPARSE_IRQ

static struct irq_desc irq_desc_init = {
@@ -85,14 +131,9 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
	arch_init_chip_data(desc, node);
}

/*
 * Protect the sparse_irqs:
 */
DEFINE_RAW_SPINLOCK(sparse_irq_lock);

static RADIX_TREE(irq_desc_tree, GFP_ATOMIC);

static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
{
	radix_tree_insert(&irq_desc_tree, irq, desc);
}
@@ -111,6 +152,94 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
		radix_tree_replace_slot(ptr, desc);
}

static void delete_irq_desc(unsigned int irq)
{
	radix_tree_delete(&irq_desc_tree, irq);
}

#ifdef CONFIG_SMP
static void free_masks(struct irq_desc *desc)
{
#ifdef CONFIG_GENERIC_PENDING_IRQ
	free_cpumask_var(desc->pending_mask);
#endif
	free_cpumask_var(desc->affinity);
}
#else
static inline void free_masks(struct irq_desc *desc) { }
#endif

static struct irq_desc *alloc_desc(int irq, int node)
{
	struct irq_desc *desc;
	gfp_t gfp = GFP_KERNEL;

	desc = kzalloc_node(sizeof(*desc), gfp, node);
	if (!desc)
		return NULL;
	/* allocate based on nr_cpu_ids */
	desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs),
					 gfp, node);
	if (!desc->kstat_irqs)
		goto err_desc;

	if (alloc_masks(desc, gfp, node))
		goto err_kstat;

	raw_spin_lock_init(&desc->lock);
	lockdep_set_class(&desc->lock, &irq_desc_lock_class);

	desc_set_defaults(irq, desc, node);

	return desc;

err_kstat:
	kfree(desc->kstat_irqs);
err_desc:
	kfree(desc);
	return NULL;
}

static void free_desc(unsigned int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);
	unsigned long flags;

	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
	delete_irq_desc(irq);
	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);

	free_masks(desc);
	kfree(desc->kstat_irqs);
	kfree(desc);
}

static int alloc_descs(unsigned int start, unsigned int cnt, int node)
{
	struct irq_desc *desc;
	unsigned long flags;
	int i;

	for (i = 0; i < cnt; i++) {
		desc = alloc_desc(start + i, node);
		if (!desc)
			goto err;
		raw_spin_lock_irqsave(&sparse_irq_lock, flags);
		irq_insert_desc(start + i, desc);
		raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
	}
	return start;

err:
	for (i--; i >= 0; i--)
		free_desc(start + i);

	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
	bitmap_clear(allocated_irqs, start, cnt);
	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
	return -ENOMEM;
}

static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
	[0 ... NR_IRQS_LEGACY-1] = {
		.status		= IRQ_DEFAULT_INIT_FLAGS,
@@ -155,7 +284,7 @@ int __init early_irq_init(void)
		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
		alloc_desc_masks(&desc[i], node, true);
		init_desc_masks(&desc[i]);
		set_irq_desc(i, &desc[i]);
		irq_insert_desc(i, &desc[i]);
	}

	return arch_early_irq_init();
@@ -192,7 +321,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
	}
	init_one_irq_desc(irq, desc, node);

	set_irq_desc(irq, desc);
	irq_insert_desc(irq, desc);

out_unlock:
	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
@@ -245,8 +374,94 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
{
	return irq_to_desc(irq);
}

#ifdef CONFIG_SMP
static inline int desc_node(struct irq_desc *desc)
{
	return desc->irq_data.node;
}
#else
static inline int desc_node(struct irq_desc *desc) { return 0; }
#endif

static void free_desc(unsigned int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);
	unsigned long flags;

	raw_spin_lock_irqsave(&desc->lock, flags);
	desc_set_defaults(irq, desc, desc_node(desc));
	raw_spin_unlock_irqrestore(&desc->lock, flags);
}

static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
{
	return start;
}
#endif /* !CONFIG_SPARSE_IRQ */

/* Dynamic interrupt handling */

/**
 * irq_free_descs - free irq descriptors
 * @from:	Start of descriptor range
 * @cnt:	Number of consecutive irqs to free
 */
void irq_free_descs(unsigned int from, unsigned int cnt)
{
	unsigned long flags;
	int i;

	if (from >= nr_irqs || (from + cnt) > nr_irqs)
		return;

	for (i = 0; i < cnt; i++)
		free_desc(from + i);

	raw_spin_lock_irqsave(&sparse_irq_lock, flags);
	bitmap_clear(allocated_irqs, from, cnt);
	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
}

/**
 * irq_alloc_descs - allocate and initialize a range of irq descriptors
 * @irq:	Allocate for specific irq number if irq >= 0
 * @from:	Start the search from this irq number
 * @cnt:	Number of consecutive irqs to allocate.
 * @node:	Preferred node on which the irq descriptor should be allocated
 *
 * Returns the first irq number or error code
 */
int __ref
irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
{
	unsigned long flags;
	int start, ret;

	if (!cnt)
		return -EINVAL;

	raw_spin_lock_irqsave(&sparse_irq_lock, flags);

	start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
	ret = -EEXIST;
	if (irq >=0 && start != irq)
		goto err;

	ret = -ENOMEM;
	if (start >= nr_irqs)
		goto err;

	bitmap_set(allocated_irqs, start, cnt);
	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
	return alloc_descs(start, cnt, node);

err:
	raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
	return ret;
}

/* Statistics access */
void clear_kstat_irqs(struct irq_desc *desc)
{
	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));