Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c89ca8ab authored by Scott Wood's avatar Scott Wood
Browse files

powerpc/e6500: Optimize hugepage TLB misses



Some workloads take a lot of TLB misses despite using traditional
hugepages.  Handle these TLB misses in the asm fastpath rather than
going through a bunch of C code.  With this patch I measured around a
5x speedup in handling hugepage TLB misses.

Signed-off-by: default avatarScott Wood <scottwood@freescale.com>
parent fb326e98
Loading
Loading
Loading
Loading
+48 −3
Original line number Diff line number Diff line
@@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
	clrrdi	r15,r15,3
	cmpdi	cr0,r14,0
	bge	tlb_miss_fault_e6500	/* Bad pgd entry or hugepage; bail */
	bge	tlb_miss_huge_e6500	/* Bad pgd entry or hugepage; bail */
	ldx	r14,r14,r15		/* grab pud entry */

	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
	clrrdi	r15,r15,3
	cmpdi	cr0,r14,0
	bge	tlb_miss_fault_e6500
	bge	tlb_miss_huge_e6500
	ldx	r14,r14,r15		/* Grab pmd entry */

	mfspr	r10,SPRN_MAS0
	cmpdi	cr0,r14,0
	bge	tlb_miss_fault_e6500
	bge	tlb_miss_huge_e6500

	/* Now we build the MAS for a 2M indirect page:
	 *
@@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
	clrrdi	r15,r16,21		/* make EA 2M-aligned */
	mtspr	SPRN_MAS2,r15

tlb_miss_huge_done_e6500:
	lbz	r15,TCD_ESEL_NEXT(r11)
	lbz	r16,TCD_ESEL_MAX(r11)
	lbz	r14,TCD_ESEL_FIRST(r11)
@@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
	tlb_epilog_bolted
	rfi

tlb_miss_huge_e6500:
	beq	tlb_miss_fault_e6500
	li	r10,1
	andi.	r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
	rldimi	r14,r10,63,0		/* Set PD_HUGE */
	xor	r14,r14,r15		/* Clear size bits */
	ldx	r14,0,r14

	/*
	 * Now we build the MAS for a huge page.
	 *
	 * MAS 0   :	ESEL needs to be filled by software round-robin
	 *		 - can be handled by indirect code
	 * MAS 1   :	Need to clear IND and set TSIZE
	 * MAS 2,3+7:	Needs to be redone similar to non-tablewalk handler
	 */

	subi	r15,r15,10		/* Convert psize to tsize */
	mfspr	r10,SPRN_MAS1
	rlwinm	r10,r10,0,~MAS1_IND
	rlwimi	r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
	mtspr	SPRN_MAS1,r10

	li	r10,-0x400
	sld	r15,r10,r15		/* Generate mask based on size */
	and	r10,r16,r15
	rldicr	r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
	rlwimi	r10,r14,32-19,27,31	/* Insert WIMGE */
	clrldi	r15,r15,PAGE_SHIFT	/* Clear crap at the top */
	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
	mtspr	SPRN_MAS2,r10
	andi.	r10,r14,_PAGE_DIRTY
	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */

	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
	bne	1f
	li	r10,MAS3_SW|MAS3_UW
	andc	r15,r15,r10
1:
	mtspr	SPRN_MAS7_MAS3,r15

	mfspr	r10,SPRN_MAS0
	b	tlb_miss_huge_done_e6500

tlb_miss_kernel_e6500:
	ld	r14,PACA_KERNELPGD(r13)
	cmpldi	cr1,r15,8		/* Check for vmalloc region */