Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b37d1c18 authored by Mikulas Patocka's avatar Mikulas Patocka Committed by Helge Deller
Browse files

parisc: Use per-pagetable spinlock



PA-RISC uses a global spinlock to protect pagetable updates in the TLB
fault handlers. When multiple cores are taking TLB faults simultaneously,
the cache line containing the spinlock becomes a bottleneck.

This patch embeds the spinlock in the top level page directory, so that
every process has its own lock. It improves performance by 30% when
doing parallel compilations.

At least on the N class systems, only one PxTLB inter processor
broadcast can be active at any one time on the Merced bus. If a Merced
bus is found, this patch serializes the TLB flushes with the
pa_tlb_flush_lock spinlock.

v1: Initial patch by Mikulas
v2: Added Merced detection by Helge
v3: Revised TLB serialization by Dave & Helge

Signed-off-by: default avatarMikulas Patocka <mpatocka@redhat.com>
Signed-off-by: default avatarJohn David Anglin <dave.anglin@bell.net>
Signed-off-by: default avatarHelge Deller <deller@gmx.de>
parent d19a1290
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -120,7 +120,7 @@ extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path);
extern void init_parisc_bus(void);
extern struct device *hwpath_to_device(struct hardware_path *modpath);
extern void device_to_hwpath(struct device *dev, struct hardware_path *path);

extern int machine_has_merced_bus(void);

/* inventory.c: */
extern void do_memory_inventory(void);
+1 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
		__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
#endif
	}
	spin_lock_init(pgd_spinlock(actual_pgd));
	return actual_pgd;
}

+54 −15
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@
#include <asm/processor.h>
#include <asm/cache.h>

extern spinlock_t pa_tlb_lock;
static inline spinlock_t *pgd_spinlock(pgd_t *);

/*
 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -34,16 +34,46 @@ extern spinlock_t pa_tlb_lock;
 */
#define kern_addr_valid(addr)	(1)

/* Purge data and instruction TLB entries.  Must be called holding
 * the pa_tlb_lock.  The TLB purge instructions are slow on SMP
 * machines since the purge must be broadcast to all CPUs.
/* This is for the serialization of PxTLB broadcasts. At least on the N class
 * systems, only one PxTLB inter processor broadcast can be active at any one
 * time on the Merced bus.

 * PTE updates are protected by locks in the PMD.
 */
extern spinlock_t pa_tlb_flush_lock;
extern spinlock_t pa_swapper_pg_lock;
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
extern int pa_serialize_tlb_flushes;
#else
#define pa_serialize_tlb_flushes        (0)
#endif

#define purge_tlb_start(flags)  do { \
	if (pa_serialize_tlb_flushes)	\
		spin_lock_irqsave(&pa_tlb_flush_lock, flags); \
	else \
		local_irq_save(flags);	\
	} while (0)
#define purge_tlb_end(flags)	do { \
	if (pa_serialize_tlb_flushes)	\
		spin_unlock_irqrestore(&pa_tlb_flush_lock, flags); \
	else \
		local_irq_restore(flags); \
	} while (0)

/* Purge data and instruction TLB entries. The TLB purge instructions
 * are slow on SMP machines since the purge must be broadcast to all CPUs.
 */

static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
{
	unsigned long flags;

	purge_tlb_start(flags);
	mtsp(mm->context, 1);
	pdtlb(addr);
	pitlb(addr);
	purge_tlb_end(flags);
}

/* Certain architectures need to do special things when PTEs
@@ -59,11 +89,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
	do {							\
		pte_t old_pte;					\
		unsigned long flags;				\
		spin_lock_irqsave(&pa_tlb_lock, flags);		\
		spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
		old_pte = *ptep;				\
		set_pte(ptep, pteval);				\
		purge_tlb_entries(mm, addr);			\
		spin_unlock_irqrestore(&pa_tlb_lock, flags);	\
		spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
	} while (0)

#endif /* !__ASSEMBLY__ */
@@ -88,10 +118,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#if CONFIG_PGTABLE_LEVELS == 3
#define PGD_ORDER	1 /* Number of pages per pgd */
#define PMD_ORDER	1 /* Number of pages per pmd */
#define PGD_ALLOC_ORDER	2 /* first pgd contains pmd */
#define PGD_ALLOC_ORDER	(2 + 1) /* first pgd contains pmd */
#else
#define PGD_ORDER	1 /* Number of pages per pgd */
#define PGD_ALLOC_ORDER	PGD_ORDER
#define PGD_ALLOC_ORDER	(PGD_ORDER + 1)
#endif

/* Definitions for 3rd level (we use PLD here for Page Lower directory
@@ -459,6 +489,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x)		((pte_t) { (x).val })


static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
{
	if (unlikely(pgd == swapper_pg_dir))
		return &pa_swapper_pg_lock;
	return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
}


static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
	pte_t pte;
@@ -467,15 +506,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
	if (!pte_young(*ptep))
		return 0;

	spin_lock_irqsave(&pa_tlb_lock, flags);
	spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
	pte = *ptep;
	if (!pte_young(pte)) {
		spin_unlock_irqrestore(&pa_tlb_lock, flags);
		spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
		return 0;
	}
	set_pte(ptep, pte_mkold(pte));
	purge_tlb_entries(vma->vm_mm, addr);
	spin_unlock_irqrestore(&pa_tlb_lock, flags);
	spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
	return 1;
}

@@ -485,11 +524,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
	pte_t old_pte;
	unsigned long flags;

	spin_lock_irqsave(&pa_tlb_lock, flags);
	spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
	old_pte = *ptep;
	set_pte(ptep, __pte(0));
	purge_tlb_entries(mm, addr);
	spin_unlock_irqrestore(&pa_tlb_lock, flags);
	spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);

	return old_pte;
}
@@ -497,10 +536,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
	unsigned long flags;
	spin_lock_irqsave(&pa_tlb_lock, flags);
	spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
	set_pte(ptep, pte_wrprotect(*ptep));
	purge_tlb_entries(mm, addr);
	spin_unlock_irqrestore(&pa_tlb_lock, flags);
	spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
}

#define pte_same(A,B)	(pte_val(A) == pte_val(B))
+1 −23
Original line number Diff line number Diff line
@@ -8,21 +8,6 @@
#include <linux/sched.h>
#include <asm/mmu_context.h>


/* This is for the serialisation of PxTLB broadcasts.  At least on the
 * N class systems, only one PxTLB inter processor broadcast can be
 * active at any one time on the Merced bus.  This tlb purge
 * synchronisation is fairly lightweight and harmless so we activate
 * it on all systems not just the N class.

 * It is also used to ensure PTE updates are atomic and consistent
 * with the TLB.
 */
extern spinlock_t pa_tlb_lock;

#define purge_tlb_start(flags)	spin_lock_irqsave(&pa_tlb_lock, flags)
#define purge_tlb_end(flags)	spin_unlock_irqrestore(&pa_tlb_lock, flags)

extern void flush_tlb_all(void);
extern void flush_tlb_all_local(void *);

@@ -79,13 +64,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma,
	unsigned long addr)
{
	unsigned long flags, sid;

	sid = vma->vm_mm->context;
	purge_tlb_start(flags);
	mtsp(sid, 1);
	pdtlb(addr);
	pitlb(addr);
	purge_tlb_end(flags);
	purge_tlb_entries(vma->vm_mm, addr);
}
#endif
+11 −4
Original line number Diff line number Diff line
@@ -40,12 +40,19 @@ void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);


/* On some machines (e.g. ones with the Merced bus), there can be
/* On some machines (i.e., ones with the Merced bus), there can be
 * only a single PxTLB broadcast at a time; this must be guaranteed
 * by software.  We put a spinlock around all TLB flushes  to
 * ensure this.
 * by software. We need a spinlock around all TLB flushes to ensure
 * this.
 */
DEFINE_SPINLOCK(pa_tlb_lock);
DEFINE_SPINLOCK(pa_tlb_flush_lock);

/* Swapper page setup lock. */
DEFINE_SPINLOCK(pa_swapper_pg_lock);

#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
int pa_serialize_tlb_flushes __read_mostly;
#endif

struct pdc_cache_info cache_info __read_mostly;
#ifndef CONFIG_PA20
Loading