Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9b3a53ab authored by Stuart Menefy's avatar Stuart Menefy Committed by Paul Mundt
Browse files

sh: TLB miss fast-path optimizations.



Handle simple TLB miss faults which can be resolved completely
from the page table in assembler.

Signed-off-by: default avatarStuart Menefy <stuart.menefy@st.com>
Signed-off-by: default avatarPaul Mundt <lethal@linux-sh.org>
parent 9daa0c25
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -379,6 +379,9 @@ config CPU_HAS_SR_RB
	  See <file:Documentation/sh/register-banks.txt> for further
	  information on SR.RB and register banking in the kernel in general.

config CPU_HAS_PTEA
	bool

endmenu

menu "Timer support"
+186 −20
Original line number Diff line number Diff line
@@ -13,8 +13,10 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpu/mmu_context.h>
#include <asm/unistd.h>
#include <asm/cpu/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/page.h>

! NOTE:
! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
@@ -136,29 +138,14 @@ ENTRY(tlb_protection_violation_store)

call_dpf:
	mov.l	1f, r0
	mov	r5, r8
	mov.l	@r0, r6
	mov	r6, r9
	mov.l	2f, r0
	sts	pr, r10
	jsr	@r0
	 mov	r15, r4
	!
	tst	r0, r0
	bf/s	0f
	 lds	r10, pr
	rts
	 nop
0:	sti
 	mov.l	@r0, r6		! address
	mov.l	3f, r0
	mov	r9, r6
	mov	r8, r5
	sti
	jmp	@r0
	 mov	r15, r4
 	 mov	r15, r4		! regs

	.align 2
1:	.long	MMU_TEA
2:	.long	__do_page_fault
3:	.long	do_page_fault

	.align	2
@@ -344,9 +331,176 @@ general_exception:
2:	.long	ret_from_exception
!
!

/* This code makes some assumptions to improve performance.
 * Make sure they are stil true. */
#if PTRS_PER_PGD != PTRS_PER_PTE
#error PDG and PTE sizes don't match
#endif

/* gas doesn't flag impossible values for mov #immediate as an error */
#if (_PAGE_PRESENT >> 2) > 0x7f
#error cannot load PAGE_PRESENT as an immediate
#endif
#if _PAGE_DIRTY > 0x7f
#error cannot load PAGE_DIRTY as an immediate
#endif
#if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED
#error cannot derive PAGE_ACCESSED from PAGE_PRESENT
#endif

#if defined(CONFIG_CPU_SH4)
#define ldmmupteh(r)	mov.l	8f, r
#else
#define ldmmupteh(r)	mov	#MMU_PTEH, r
#endif

	.balign 	1024,0,1024
tlb_miss:
	mov.l	1f, k2
#ifdef COUNT_EXCEPTIONS
	! Increment the counts
	mov.l	9f, k1
	mov.l	@k1, k2
	add	#1, k2
	mov.l	k2, @k1
#endif

	! k0 scratch
	! k1 pgd and pte pointers
	! k2 faulting address
	! k3 pgd and pte index masks
	! k4 shift

	! Load up the pgd entry (k1)

	ldmmupteh(k0)			!  9 LS (latency=2)	MMU_PTEH

	mov.w	4f, k3			!  8 LS (latency=2)	(PTRS_PER_PGD-1) << 2
	mov	#-(PGDIR_SHIFT-2), k4	!  6 EX

	mov.l	@(MMU_TEA-MMU_PTEH,k0), k2	! 18 LS (latency=2)

	mov.l	@(MMU_TTB-MMU_PTEH,k0), k1	! 18 LS (latency=2)

	mov	k2, k0			!   5 MT (latency=0)
	shld	k4, k0			!  99 EX

	and	k3, k0			!  78 EX

	mov.l	@(k0, k1), k1		!  21 LS (latency=2)
	mov	#-(PAGE_SHIFT-2), k4	!   6 EX

	! Load up the pte entry (k2)

	mov	k2, k0			!   5 MT (latency=0)
	shld	k4, k0			!  99 EX

	tst	k1, k1			!  86 MT

	bt	20f			! 110 BR

	and	k3, k0			!  78 EX
	mov.w	5f, k4			!   8 LS (latency=2)	_PAGE_PRESENT

	mov.l	@(k0, k1), k2		!  21 LS (latency=2)
	add	k0, k1			!  49 EX

#ifdef CONFIG_CPU_HAS_PTEA
	! Test the entry for present and _PAGE_ACCESSED

	mov	#-28, k3		!   6 EX
	mov	k2, k0			!   5 MT (latency=0)

	tst	k4, k2			!  68 MT
	shld	k3, k0			!  99 EX

	bt	20f			! 110 BR

	! Set PTEA register
	! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1)
	!
	! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT

	and	#0xe, k0		!  79 EX

	mov	k0, k3			!   5 MT (latency=0)
	mov	k2, k0			!   5 MT (latency=0)

	and	#1, k0			!  79 EX

	or	k0, k3			!  82 EX

	ldmmupteh(k0)			!   9 LS (latency=2)
	shll2	k4			! 101 EX		_PAGE_ACCESSED

	tst	k4, k2			!  68 MT

	mov.l	k3, @(MMU_PTEA-MMU_PTEH,k0)	! 27 LS

	mov.l	7f, k3			!   9 LS (latency=2)	_PAGE_FLAGS_HARDWARE_MASK

	! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
#else

	! Test the entry for present and _PAGE_ACCESSED

	mov.l	7f, k3			!   9 LS (latency=2)	_PAGE_FLAGS_HARDWARE_MASK
	tst	k4, k2			!  68 MT

	shll2	k4			! 101 EX		_PAGE_ACCESSED
	ldmmupteh(k0)			!   9 LS (latency=2)

	bt	20f			! 110 BR
	tst	k4, k2			!  68 MT

	! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED

#endif

	! Set up the entry

	and	k2, k3			!  78 EX
	bt/s	10f			! 108 BR

	 mov.l	k3, @(MMU_PTEL-MMU_PTEH,k0)	! 27 LS

	ldtlb				! 128 CO

	! At least one instruction between ldtlb and rte
	nop				! 119 NOP

	rte				! 126 CO

	 nop				! 119 NOP


10:	or	k4, k2			!  82 EX

	ldtlb				! 128 CO

	! At least one instruction between ldtlb and rte
	mov.l	k2, @k1			!  27 LS

	rte				! 126 CO

	! Note we cannot execute mov here, because it is executed after
	! restoring SSR, so would be executed in user space.
	 nop				! 119 NOP


	.align 5
	! Once cache line if possible...
1:	.long	swapper_pg_dir
4:	.short	(PTRS_PER_PGD-1) << 2
5:	.short	_PAGE_PRESENT
7:	.long	_PAGE_FLAGS_HARDWARE_MASK
8:	.long	MMU_PTEH
#ifdef COUNT_EXCEPTIONS
9:	.long	exception_count_miss
#endif

	! Either pgd or pte not present
20:	mov.l	1f, k2
	mov.l	4f, k3
	bra	handle_exception
	 mov.l	@k2, k2
@@ -496,6 +650,15 @@ skip_save:
	bf	interrupt_exception
	shlr2	r8
	shlr	r8

#ifdef COUNT_EXCEPTIONS
	mov.l	5f, r9
	add	r8, r9
	mov.l	@r9, r10
	add	#1, r10
	mov.l	r10, @r9
#endif

	mov.l	4f, r9
	add	r8, r9
	mov.l	@r9, r9
@@ -509,6 +672,9 @@ skip_save:
2:	.long	0x000080f0	! FD=1, IMASK=15
3:	.long	0xcfffffff	! RB=0, BL=0
4:	.long	exception_handling_table
#ifdef COUNT_EXCEPTIONS
5:	.long	exception_count_table
#endif

interrupt_exception:
	mov.l	1f, r9
+11 −8
Original line number Diff line number Diff line
@@ -79,16 +79,16 @@ int __init detect_cpu_and_cache_system(void)
	case 0x205:
		cpu_data->type = CPU_SH7750;
		cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
				   CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA;
				   CPU_HAS_PERF_COUNTER;
		break;
	case 0x206:
		cpu_data->type = CPU_SH7750S;
		cpu_data->flags |= CPU_HAS_P2_FLUSH_BUG | CPU_HAS_FPU |
				   CPU_HAS_PERF_COUNTER | CPU_HAS_PTEA;
				   CPU_HAS_PERF_COUNTER;
		break;
	case 0x1100:
		cpu_data->type = CPU_SH7751;
		cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
		cpu_data->flags |= CPU_HAS_FPU;
		break;
	case 0x2000:
		cpu_data->type = CPU_SH73180;
@@ -126,23 +126,22 @@ int __init detect_cpu_and_cache_system(void)
		break;
	case 0x8000:
		cpu_data->type = CPU_ST40RA;
		cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
		cpu_data->flags |= CPU_HAS_FPU;
		break;
	case 0x8100:
		cpu_data->type = CPU_ST40GX1;
		cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
		cpu_data->flags |= CPU_HAS_FPU;
		break;
	case 0x700:
		cpu_data->type = CPU_SH4_501;
		cpu_data->icache.ways = 2;
		cpu_data->dcache.ways = 2;
		cpu_data->flags |= CPU_HAS_PTEA;
		break;
	case 0x600:
		cpu_data->type = CPU_SH4_202;
		cpu_data->icache.ways = 2;
		cpu_data->dcache.ways = 2;
		cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
		cpu_data->flags |= CPU_HAS_FPU;
		break;
	case 0x500 ... 0x501:
		switch (prr) {
@@ -160,7 +159,7 @@ int __init detect_cpu_and_cache_system(void)
		cpu_data->icache.ways = 2;
		cpu_data->dcache.ways = 2;

		cpu_data->flags |= CPU_HAS_FPU | CPU_HAS_PTEA;
		cpu_data->flags |= CPU_HAS_FPU;

		break;
	default:
@@ -173,6 +172,10 @@ int __init detect_cpu_and_cache_system(void)
	cpu_data->dcache.ways = 1;
#endif

#ifdef CONFIG_CPU_HAS_PTEA
	cpu_data->flags |= CPU_HAS_PTEA;
#endif

	/*
	 * On anything that's not a direct-mapped cache, look to the CVR
	 * for I/D-cache specifics.
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ config CPU_SH4
	bool
	select CPU_HAS_INTEVT
	select CPU_HAS_SR_RB
	select CPU_HAS_PTEA if !CPU_SUBTYPE_ST40

config CPU_SH4A
	bool
+0 −86
Original line number Diff line number Diff line
@@ -223,89 +223,3 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
	if (!user_mode(regs))
		goto no_context;
}

#ifdef CONFIG_SH_STORE_QUEUES
/*
 * This is a special case for the SH-4 store queues, as pages for this
 * space still need to be faulted in before it's possible to flush the
 * store queue cache for writeout to the remapped region.
 */
#define P3_ADDR_MAX		(P4SEG_STORE_QUE + 0x04000000)
#else
#define P3_ADDR_MAX		P4SEG
#endif

/*
 * Called with interrupts disabled.
 */
asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
					 unsigned long writeaccess,
					 unsigned long address)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	pte_t entry;
	struct mm_struct *mm = current->mm;
	spinlock_t *ptl;
	int ret = 1;

#ifdef CONFIG_SH_KGDB
	if (kgdb_nofault && kgdb_bus_err_hook)
		kgdb_bus_err_hook();
#endif

	/*
	 * We don't take page faults for P1, P2, and parts of P4, these
	 * are always mapped, whether it be due to legacy behaviour in
	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
	 */
	if (address >= P3SEG && address < P3_ADDR_MAX) {
		pgd = pgd_offset_k(address);
		mm = NULL;
	} else {
		if (unlikely(address >= TASK_SIZE || !mm))
			return 1;

		pgd = pgd_offset(mm, address);
	}

	pud = pud_offset(pgd, address);
	if (pud_none_or_clear_bad(pud))
		return 1;
	pmd = pmd_offset(pud, address);
	if (pmd_none_or_clear_bad(pmd))
		return 1;

	if (mm)
		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
	else
		pte = pte_offset_kernel(pmd, address);

	entry = *pte;
	if (unlikely(pte_none(entry) || pte_not_present(entry)))
		goto unlock;
	if (unlikely(writeaccess && !pte_write(entry)))
		goto unlock;

	if (writeaccess)
		entry = pte_mkdirty(entry);
	entry = pte_mkyoung(entry);

#ifdef CONFIG_CPU_SH4
	/*
	 * ITLB is not affected by "ldtlb" instruction.
	 * So, we need to flush the entry by ourselves.
	 */
	__flush_tlb_page(get_asid(), address & PAGE_MASK);
#endif

	set_pte(pte, entry);
	update_mmu_cache(NULL, address, entry);
	ret = 0;
unlock:
	if (mm)
		pte_unmap_unlock(pte, ptl);
	return ret;
}
Loading