Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c5fe5db authored by John David Anglin's avatar John David Anglin Committed by Helge Deller
Browse files

parisc: Optimze cache flush algorithms



The attached patch implements three optimizations:

1) Loops in flush_user_dcache_range_asm, flush_kernel_dcache_range_asm,
purge_kernel_dcache_range_asm, flush_user_icache_range_asm, and
flush_kernel_icache_range_asm are unrolled to reduce branch overhead.

2) The static branch prediction for cmpb instructions in pacache.S have
been reviewed and the operand order adjusted where necessary.

3) For flush routines in cache.c, we purge rather flush when we have no
context.  The pdc instruction at level 0 is not required to write back
dirty lines to memory. This provides a performance improvement over the
fdc instruction if the feature is implemented.

Version 2 adds alternative patching.

The patch provides an average improvement of about 2%.

Signed-off-by: default avatarJohn David Anglin <dave.anglin@bell.net>
Signed-off-by: default avatarHelge Deller <deller@gmx.de>
parent 5a23237f
Loading
Loading
Loading
Loading
+27 −6
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@ EXPORT_SYMBOL(dcache_stride);

void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
EXPORT_SYMBOL(flush_dcache_page_asm);
void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);


@@ -303,6 +304,17 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
	preempt_enable();
}

static inline void
__purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
		   unsigned long physaddr)
{
	preempt_disable();
	purge_dcache_page_asm(physaddr, vmaddr);
	if (vma->vm_flags & VM_EXEC)
		flush_icache_page_asm(physaddr, vmaddr);
	preempt_enable();
}

void flush_dcache_page(struct page *page)
{
	struct address_space *mapping = page_mapping_file(page);
@@ -563,9 +575,12 @@ void flush_cache_mm(struct mm_struct *mm)
			pfn = pte_pfn(*ptep);
			if (!pfn_valid(pfn))
				continue;
			if (unlikely(mm->context))
			if (unlikely(mm->context)) {
				flush_tlb_page(vma, addr);
				__flush_cache_page(vma, addr, PFN_PHYS(pfn));
			} else {
				__purge_cache_page(vma, addr, PFN_PHYS(pfn));
			}
		}
	}
}
@@ -600,9 +615,12 @@ void flush_cache_range(struct vm_area_struct *vma,
			continue;
		pfn = pte_pfn(*ptep);
		if (pfn_valid(pfn)) {
			if (unlikely(vma->vm_mm->context))
			if (unlikely(vma->vm_mm->context)) {
				flush_tlb_page(vma, addr);
				__flush_cache_page(vma, addr, PFN_PHYS(pfn));
			} else {
				__purge_cache_page(vma, addr, PFN_PHYS(pfn));
			}
		}
	}
}
@@ -611,9 +629,12 @@ void
flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{
	if (pfn_valid(pfn)) {
		if (likely(vma->vm_mm->context))
		if (likely(vma->vm_mm->context)) {
			flush_tlb_page(vma, vmaddr);
			__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
		} else {
			__purge_cache_page(vma, vmaddr, PFN_PHYS(pfn));
		}
	}
}

+202 −14
Original line number Diff line number Diff line
@@ -838,7 +838,6 @@ ENTRY_CFI(flush_dcache_page_asm)
	add		%r28, %r25, %r25
	sub		%r25, r31, %r25


1:	fdc,m		r31(%r28)
	fdc,m		r31(%r28)
	fdc,m		r31(%r28)
@@ -854,7 +853,7 @@ ENTRY_CFI(flush_dcache_page_asm)
	fdc,m		r31(%r28)
	fdc,m		r31(%r28)
	fdc,m		r31(%r28)
	cmpb,COND(<<)	%r28, %r25,1b
	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
	fdc,m		r31(%r28)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
@@ -863,6 +862,67 @@ ENTRY_CFI(flush_dcache_page_asm)
	nop
ENDPROC_CFI(flush_dcache_page_asm)

ENTRY_CFI(purge_dcache_page_asm)
	ldil		L%(TMPALIAS_MAP_START), %r28
#ifdef CONFIG_64BIT
#if (TMPALIAS_MAP_START >= 0x80000000)
	depdi		0, 31,32, %r28		/* clear any sign extension */
#endif
	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
#else
	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
#endif

	/* Purge any old translation */

#ifdef CONFIG_PA20
	pdtlb,l		%r0(%r28)
#else
	tlb_lock	%r20,%r21,%r22
0:	pdtlb		%r0(%r28)
	tlb_unlock	%r20,%r21,%r22
	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif

88:	ldil		L%dcache_stride, %r1
	ldw		R%dcache_stride(%r1), r31

#ifdef CONFIG_64BIT
	depdi,z		1, 63-PAGE_SHIFT,1, %r25
#else
	depwi,z		1, 31-PAGE_SHIFT,1, %r25
#endif
	add		%r28, %r25, %r25
	sub		%r25, r31, %r25

1:      pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	pdc,m		r31(%r28)
	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
	pdc,m		r31(%r28)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
	sync
	bv		%r0(%r2)
	nop
ENDPROC_CFI(purge_dcache_page_asm)

ENTRY_CFI(flush_icache_page_asm)
	ldil		L%(TMPALIAS_MAP_START), %r28
#ifdef CONFIG_64BIT
@@ -908,7 +968,6 @@ ENTRY_CFI(flush_icache_page_asm)
	add		%r28, %r25, %r25
	sub		%r25, %r31, %r25


	/* fic only has the type 26 form on PA1.1, requiring an
	 * explicit space specification, so use %sr4 */
1:      fic,m		%r31(%sr4,%r28)
@@ -926,7 +985,7 @@ ENTRY_CFI(flush_icache_page_asm)
	fic,m		%r31(%sr4,%r28)
	fic,m		%r31(%sr4,%r28)
	fic,m		%r31(%sr4,%r28)
	cmpb,COND(<<)	%r28, %r25,1b
	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
	fic,m		%r31(%sr4,%r28)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
@@ -947,7 +1006,6 @@ ENTRY_CFI(flush_kernel_dcache_page_asm)
	add		%r26, %r25, %r25
	sub		%r25, %r23, %r25


1:      fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
@@ -963,7 +1021,7 @@ ENTRY_CFI(flush_kernel_dcache_page_asm)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	cmpb,COND(<<)		%r26, %r25,1b
	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
	fdc,m		%r23(%r26)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
@@ -999,7 +1057,7 @@ ENTRY_CFI(purge_kernel_dcache_page_asm)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	cmpb,COND(<<)		%r26, %r25, 1b
	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
	pdc,m		%r23(%r26)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
@@ -1014,7 +1072,33 @@ ENTRY_CFI(flush_user_dcache_range_asm)
	ldo		-1(%r23), %r21
	ANDCM		%r26, %r21, %r26

1:      cmpb,COND(<<),n	%r26, %r25, 1b
#ifdef CONFIG_64BIT
	depd,z		%r23, 59, 60, %r21
#else
	depw,z		%r23, 27, 28, %r21
#endif
	add		%r26, %r21, %r22
	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
1:	add		%r22, %r21, %r22
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	fdc,m		%r23(%sr3, %r26)
	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
	fdc,m		%r23(%sr3, %r26)

2:	cmpb,COND(>>),n	%r25, %r26, 2b
	fdc,m		%r23(%sr3, %r26)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
@@ -1029,7 +1113,33 @@ ENTRY_CFI(flush_kernel_dcache_range_asm)
	ldo		-1(%r23), %r21
	ANDCM		%r26, %r21, %r26

1:      cmpb,COND(<<),n	%r26, %r25,1b
#ifdef CONFIG_64BIT
	depd,z		%r23, 59, 60, %r21
#else
	depw,z		%r23, 27, 28, %r21
#endif
	add		%r26, %r21, %r22
	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
1:	add		%r22, %r21, %r22
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	fdc,m		%r23(%r26)
	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
	fdc,m		%r23(%r26)

2:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
	fdc,m		%r23(%r26)

	sync
@@ -1045,7 +1155,33 @@ ENTRY_CFI(purge_kernel_dcache_range_asm)
	ldo		-1(%r23), %r21
	ANDCM		%r26, %r21, %r26

1:      cmpb,COND(<<),n	%r26, %r25,1b
#ifdef CONFIG_64BIT
	depd,z		%r23, 59, 60, %r21
#else
	depw,z		%r23, 27, 28, %r21
#endif
	add		%r26, %r21, %r22
	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
1:	add		%r22, %r21, %r22
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	pdc,m		%r23(%r26)
	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
	pdc,m		%r23(%r26)

2:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
	pdc,m		%r23(%r26)

	sync
@@ -1061,7 +1197,33 @@ ENTRY_CFI(flush_user_icache_range_asm)
	ldo		-1(%r23), %r21
	ANDCM		%r26, %r21, %r26

1:      cmpb,COND(<<),n	%r26, %r25,1b
#ifdef CONFIG_64BIT
	depd,z		%r23, 59, 60, %r21
#else
	depw,z		%r23, 27, 28, %r21
#endif
	add		%r26, %r21, %r22
	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
1:	add		%r22, %r21, %r22
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	fic,m		%r23(%sr3, %r26)
	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
	fic,m		%r23(%sr3, %r26)

2:	cmpb,COND(>>),n	%r25, %r26, 2b
	fic,m		%r23(%sr3, %r26)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
@@ -1098,7 +1260,7 @@ ENTRY_CFI(flush_kernel_icache_page)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	cmpb,COND(<<)		%r26, %r25, 1b
	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
	fic,m		%r23(%sr4, %r26)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
@@ -1113,7 +1275,33 @@ ENTRY_CFI(flush_kernel_icache_range_asm)
	ldo		-1(%r23), %r21
	ANDCM		%r26, %r21, %r26

1:      cmpb,COND(<<),n	%r26, %r25, 1b
#ifdef CONFIG_64BIT
	depd,z		%r23, 59, 60, %r21
#else
	depw,z		%r23, 27, 28, %r21
#endif
	add		%r26, %r21, %r22
	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
1:	add		%r22, %r21, %r22
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	fic,m		%r23(%sr4, %r26)
	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
	fic,m		%r23(%sr4, %r26)

2:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
	fic,m		%r23(%sr4, %r26)

89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)