Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2c8c53e2 authored by David Daney's avatar David Daney Committed by Ralf Baechle
Browse files

MIPS: Optimize TLB handlers for Octeon CPUs



Octeon can use scratch registers in the TLB handlers.  Octeon II can
use LDX instructions.

Signed-off-by: default avatarDavid Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/1904/


Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent bb3d68c3
Loading
Loading
Loading
Loading
+310 −51
Original line number Diff line number Diff line
@@ -77,6 +77,40 @@ static int use_bbit_insns(void)
	}
}

static int use_lwx_insns(void)
{
	switch (current_cpu_type()) {
	case CPU_CAVIUM_OCTEON2:
		return 1;
	default:
		return 0;
	}
}
#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \
    CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
static bool scratchpad_available(void)
{
	return true;
}
static int scratchpad_offset(int i)
{
	/*
	 * CVMSEG starts at address -32768 and extends for
	 * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines.
	 */
	i += 1; /* Kernel use starts at the top and works down. */
	return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768;
}
#else
static bool scratchpad_available(void)
{
	return false;
}
static int scratchpad_offset(int i)
{
	BUG();
}
#endif
/*
 * Found by experiment: At least some revisions of the 4kc throw under
 * some circumstances a machine check exception, triggered by invalid
@@ -187,7 +221,7 @@ static struct uasm_reloc relocs[128] __cpuinitdata;
static int check_for_high_segbits __cpuinitdata;
#endif

#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
static int check_for_high_segbits __cpuinitdata;

static unsigned int kscratch_used_mask __cpuinitdata;

@@ -208,9 +242,12 @@ static int __cpuinit allocate_kscratch(void)
	return r;
}

static int scratch_reg __cpuinitdata;
static int pgd_reg __cpuinitdata;
enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch};

#ifndef CONFIG_MIPS_PGD_C0_CONTEXT

#else /* !CONFIG_MIPS_PGD_C0_CONTEXT*/
/*
 * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current,
 * we cannot do r3000 under these circumstances.
@@ -481,8 +518,29 @@ static __cpuinit __maybe_unused void build_convert_pte_to_entrylo(u32 **p,
static __cpuinit void build_restore_pagemask(u32 **p,
					     struct uasm_reloc **r,
					     unsigned int tmp,
					     enum label_id lid)
					     enum label_id lid,
					     int restore_scratch)
{
	if (restore_scratch) {
		/* Reset default page size */
		if (PM_DEFAULT_MASK >> 16) {
			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
			uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff);
			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
			uasm_il_b(p, r, lid);
		} else if (PM_DEFAULT_MASK) {
			uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK);
			uasm_i_mtc0(p, tmp, C0_PAGEMASK);
			uasm_il_b(p, r, lid);
		} else {
			uasm_i_mtc0(p, 0, C0_PAGEMASK);
			uasm_il_b(p, r, lid);
		}
		if (scratch_reg > 0)
			UASM_i_MFC0(p, 1, 31, scratch_reg);
		else
			UASM_i_LW(p, 1, scratchpad_offset(0), 0);
	} else {
		/* Reset default page size */
		if (PM_DEFAULT_MASK >> 16) {
			uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
@@ -498,12 +556,14 @@ static __cpuinit void build_restore_pagemask(u32 **p,
			uasm_i_mtc0(p, 0, C0_PAGEMASK);
		}
	}
}

static __cpuinit void build_huge_tlb_write_entry(u32 **p,
						 struct uasm_label **l,
						 struct uasm_reloc **r,
						 unsigned int tmp,
						 enum tlb_write_entry wmode)
						 enum tlb_write_entry wmode,
						 int restore_scratch)
{
	/* Set huge page tlb entry size */
	uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16);
@@ -512,7 +572,7 @@ static __cpuinit void build_huge_tlb_write_entry(u32 **p,

	build_tlb_write_entry(p, l, r, wmode);

	build_restore_pagemask(p, r, tmp, label_leave);
	build_restore_pagemask(p, r, tmp, label_leave, restore_scratch);
}

/*
@@ -577,7 +637,7 @@ static __cpuinit void build_huge_handler_tail(u32 **p,
	UASM_i_SW(p, pte, 0, ptr);
#endif
	build_huge_update_entries(p, pte, ptr);
	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed);
	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
}
#endif /* CONFIG_HUGETLB_PAGE */

@@ -674,7 +734,6 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
#endif
}

enum vmalloc64_mode {not_refill, refill};
/*
 * BVADDR is the faulting address, PTR is scratch.
 * PTR will hold the pgd for vmalloc.
@@ -692,7 +751,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,

	uasm_l_vmalloc(l, *p);

	if (mode == refill && check_for_high_segbits) {
	if (mode != not_refill && check_for_high_segbits) {
		if (single_insn_swpd) {
			uasm_il_bltz(p, r, bvaddr, label_vmalloc_done);
			uasm_i_lui(p, ptr, uasm_rel_hi(swpd));
@@ -715,7 +774,7 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
				uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd));
		}
	}
	if (mode == refill && check_for_high_segbits) {
	if (mode != not_refill && check_for_high_segbits) {
		uasm_l_large_segbits_fault(l, *p);
		/*
		 * We get here if we are an xsseg address, or if we are
@@ -731,9 +790,17 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
		 */
		UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
		uasm_i_jr(p, ptr);

		if (mode == refill_scratch) {
			if (scratch_reg > 0)
				UASM_i_MFC0(p, 1, 31, scratch_reg);
			else
				UASM_i_LW(p, 1, scratchpad_offset(0), 0);
		} else {
			uasm_i_nop(p);
		}
	}
}

#else /* !CONFIG_64BIT */

@@ -888,6 +955,185 @@ static void __cpuinit build_update_entries(u32 **p, unsigned int tmp,
#endif
}

struct mips_huge_tlb_info {
	int huge_pte;
	int restore_scratch;
};

static struct mips_huge_tlb_info __cpuinit
build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
			       struct uasm_reloc **r, unsigned int tmp,
			       unsigned int ptr, int c0_scratch)
{
	struct mips_huge_tlb_info rv;
	unsigned int even, odd;
	int vmalloc_branch_delay_filled = 0;
	const int scratch = 1; /* Our extra working register */

	rv.huge_pte = scratch;
	rv.restore_scratch = 0;

	if (check_for_high_segbits) {
		UASM_i_MFC0(p, tmp, C0_BADVADDR);

		if (pgd_reg != -1)
			UASM_i_MFC0(p, ptr, 31, pgd_reg);
		else
			UASM_i_MFC0(p, ptr, C0_CONTEXT);

		if (c0_scratch >= 0)
			UASM_i_MTC0(p, scratch, 31, c0_scratch);
		else
			UASM_i_SW(p, scratch, scratchpad_offset(0), 0);

		uasm_i_dsrl_safe(p, scratch, tmp,
				 PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
		uasm_il_bnez(p, r, scratch, label_vmalloc);

		if (pgd_reg == -1) {
			vmalloc_branch_delay_filled = 1;
			/* Clear lower 23 bits of context. */
			uasm_i_dins(p, ptr, 0, 0, 23);
		}
	} else {
		if (pgd_reg != -1)
			UASM_i_MFC0(p, ptr, 31, pgd_reg);
		else
			UASM_i_MFC0(p, ptr, C0_CONTEXT);

		UASM_i_MFC0(p, tmp, C0_BADVADDR);

		if (c0_scratch >= 0)
			UASM_i_MTC0(p, scratch, 31, c0_scratch);
		else
			UASM_i_SW(p, scratch, scratchpad_offset(0), 0);

		if (pgd_reg == -1)
			/* Clear lower 23 bits of context. */
			uasm_i_dins(p, ptr, 0, 0, 23);

		uasm_il_bltz(p, r, tmp, label_vmalloc);
	}

	if (pgd_reg == -1) {
		vmalloc_branch_delay_filled = 1;
		/* 1 0  1 0 1  << 6  xkphys cached */
		uasm_i_ori(p, ptr, ptr, 0x540);
		uasm_i_drotr(p, ptr, ptr, 11);
	}

#ifdef __PAGETABLE_PMD_FOLDED
#define LOC_PTEP scratch
#else
#define LOC_PTEP ptr
#endif

	if (!vmalloc_branch_delay_filled)
		/* get pgd offset in bytes */
		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);

	uasm_l_vmalloc_done(l, *p);

	/*
	 *                         tmp          ptr
	 * fall-through case =   badvaddr  *pgd_current
	 * vmalloc case      =   badvaddr  swapper_pg_dir
	 */

	if (vmalloc_branch_delay_filled)
		/* get pgd offset in bytes */
		uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3);

#ifdef __PAGETABLE_PMD_FOLDED
	GET_CONTEXT(p, tmp); /* get context reg */
#endif
	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3);

	if (use_lwx_insns()) {
		UASM_i_LWX(p, LOC_PTEP, scratch, ptr);
	} else {
		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */
		uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */
	}

#ifndef __PAGETABLE_PMD_FOLDED
	/* get pmd offset in bytes */
	uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3);
	uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3);
	GET_CONTEXT(p, tmp); /* get context reg */

	if (use_lwx_insns()) {
		UASM_i_LWX(p, scratch, scratch, ptr);
	} else {
		uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */
		UASM_i_LW(p, scratch, 0, ptr);
	}
#endif
	/* Adjust the context during the load latency. */
	build_adjust_context(p, tmp);

#ifdef CONFIG_HUGETLB_PAGE
	uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update);
	/*
	 * The in the LWX case we don't want to do the load in the
	 * delay slot.  It cannot issue in the same cycle and may be
	 * speculative and unneeded.
	 */
	if (use_lwx_insns())
		uasm_i_nop(p);
#endif /* CONFIG_HUGETLB_PAGE */


	/* build_update_entries */
	if (use_lwx_insns()) {
		even = ptr;
		odd = tmp;
		UASM_i_LWX(p, even, scratch, tmp);
		UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t));
		UASM_i_LWX(p, odd, scratch, tmp);
	} else {
		UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */
		even = tmp;
		odd = ptr;
		UASM_i_LW(p, even, 0, ptr); /* get even pte */
		UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */
	}
	if (kernel_uses_smartmips_rixi) {
		uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_NO_EXEC));
		uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_NO_EXEC));
		uasm_i_drotr(p, even, even,
			     ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
		uasm_i_drotr(p, odd, odd,
			     ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC));
	} else {
		uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL));
		UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */
		uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL));
	}
	UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */

	if (c0_scratch >= 0) {
		UASM_i_MFC0(p, scratch, 31, c0_scratch);
		build_tlb_write_entry(p, l, r, tlb_random);
		uasm_l_leave(l, *p);
		rv.restore_scratch = 1;
	} else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13)  {
		build_tlb_write_entry(p, l, r, tlb_random);
		uasm_l_leave(l, *p);
		UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
	} else {
		UASM_i_LW(p, scratch, scratchpad_offset(0), 0);
		build_tlb_write_entry(p, l, r, tlb_random);
		uasm_l_leave(l, *p);
		rv.restore_scratch = 1;
	}

	uasm_i_eret(p); /* return from trap */

	return rv;
}

/*
 * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
 * because EXL == 0.  If we wrap, we can also use the 32 instruction
@@ -903,12 +1149,25 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
	struct uasm_reloc *r = relocs;
	u32 *f;
	unsigned int final_len;
	struct mips_huge_tlb_info htlb_info;
	enum vmalloc64_mode vmalloc_mode;

	memset(tlb_handler, 0, sizeof(tlb_handler));
	memset(labels, 0, sizeof(labels));
	memset(relocs, 0, sizeof(relocs));
	memset(final_handler, 0, sizeof(final_handler));

	if (scratch_reg == 0)
		scratch_reg = allocate_kscratch();

	if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) {
		htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
							  scratch_reg);
		vmalloc_mode = refill_scratch;
	} else {
		htlb_info.huge_pte = K0;
		htlb_info.restore_scratch = 0;
		vmalloc_mode = refill_noscratch;
		/*
		 * create the plain linear handler
		 */
@@ -941,16 +1200,16 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
		build_tlb_write_entry(&p, &l, &r, tlb_random);
		uasm_l_leave(&l, p);
		uasm_i_eret(&p); /* return from trap */

	}
#ifdef CONFIG_HUGETLB_PAGE
	uasm_l_tlb_huge_update(&l, p);
	UASM_i_LW(&p, K0, 0, K1);
	build_huge_update_entries(&p, K0, K1);
	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random);
	build_huge_update_entries(&p, htlb_info.huge_pte, K1);
	build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
				   htlb_info.restore_scratch);
#endif

#ifdef CONFIG_64BIT
	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, refill);
	build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode);
#endif

	/*
@@ -1616,7 +1875,7 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
		 * We clobbered C0_PAGEMASK, restore it.  On the other branch
		 * it is restored in build_huge_tlb_write_entry.
		 */
		build_restore_pagemask(&p, &r, K0, label_nopage_tlbl);
		build_restore_pagemask(&p, &r, K0, label_nopage_tlbl, 0);

		uasm_l_tlbl_goaround2(&l, p);
	}