Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit da181a8b authored by Rusty Russell's avatar Rusty Russell Committed by Andi Kleen
Browse files

[PATCH] paravirt: Add MMU virtualization to paravirt_ops



Add the three bare TLB accessor functions to paravirt-ops.  Most amusingly,
flush_tlb is redefined on SMP, so I can't call the paravirt op flush_tlb.
Instead, I chose to indicate the actual flush type, kernel (global) vs. user
(non-global).  Global in this sense means using the global bit in the page
table entry, which makes TLB entries persistent across CR3 reloads, not
global as in the SMP sense of invoking remote shootdowns, so the term is
confusingly overloaded.

AK: folded in fix from Zach for PAE compilation

Signed-off-by: default avatarZachary Amsden <zach@vmware.com>
Signed-off-by: default avatarChris Wright <chrisw@sous-sol.org>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
parent 13623d79
Loading
Loading
Loading
Loading
+109 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <asm/delay.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
#include <asm/tlbflush.h>

/* nop stub */
static void native_nop(void)
@@ -379,6 +380,97 @@ static fastcall void native_io_delay(void)
	asm volatile("outb %al,$0x80");
}

static fastcall void native_flush_tlb(void)
{
	__native_flush_tlb();
}

/*
 * Global pages have to be flushed a bit differently. Not a real
 * performance problem because this does not happen often.
 */
static fastcall void native_flush_tlb_global(void)
{
	__native_flush_tlb_global();
}

static fastcall void native_flush_tlb_single(u32 addr)
{
	__native_flush_tlb_single(addr);
}

#ifndef CONFIG_X86_PAE
static fastcall void native_set_pte(pte_t *ptep, pte_t pteval)
{
	*ptep = pteval;
}

static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
{
	*ptep = pteval;
}

static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
	*pmdp = pmdval;
}

#else /* CONFIG_X86_PAE */

static fastcall void native_set_pte(pte_t *ptep, pte_t pte)
{
	ptep->pte_high = pte.pte_high;
	smp_wmb();
	ptep->pte_low = pte.pte_low;
}

static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
{
	ptep->pte_high = pte.pte_high;
	smp_wmb();
	ptep->pte_low = pte.pte_low;
}

static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
	ptep->pte_low = 0;
	smp_wmb();
	ptep->pte_high = pte.pte_high;
	smp_wmb();
	ptep->pte_low = pte.pte_low;
}

static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
{
	set_64bit((unsigned long long *)ptep,pte_val(pteval));
}

static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
	set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
}

static fastcall void native_set_pud(pud_t *pudp, pud_t pudval)
{
	*pudp = pudval;
}

static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
	ptep->pte_low = 0;
	smp_wmb();
	ptep->pte_high = 0;
}

static fastcall void native_pmd_clear(pmd_t *pmd)
{
	u32 *tmp = (u32 *)pmd;
	*tmp = 0;
	smp_wmb();
	*(tmp + 1) = 0;
}
#endif /* CONFIG_X86_PAE */

/* These are in entry.S */
extern fastcall void native_iret(void);
extern fastcall void native_irq_enable_sysexit(void);
@@ -454,6 +546,23 @@ struct paravirt_ops paravirt_ops = {
	.apic_read = native_apic_read,
#endif

	.flush_tlb_user = native_flush_tlb,
	.flush_tlb_kernel = native_flush_tlb_global,
	.flush_tlb_single = native_flush_tlb_single,

	.set_pte = native_set_pte,
	.set_pte_at = native_set_pte_at,
	.set_pmd = native_set_pmd,
	.pte_update = (void *)native_nop,
	.pte_update_defer = (void *)native_nop,
#ifdef CONFIG_X86_PAE
	.set_pte_atomic = native_set_pte_atomic,
	.set_pte_present = native_set_pte_present,
	.set_pud = native_set_pud,
	.pte_clear = native_pte_clear,
	.pmd_clear = native_pmd_clear,
#endif

	.irq_enable_sysexit = native_irq_enable_sysexit,
	.iret = native_iret,
};
+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
 */

#undef CONFIG_X86_PAE
#undef CONFIG_PARAVIRT
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+75 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
 * para-virtualization: those hooks are defined here. */
#include <linux/linkage.h>
#include <linux/stringify.h>
#include <asm/page.h>

#ifdef CONFIG_PARAVIRT
/* These are the most performance critical ops, so we want to be able to patch
@@ -27,6 +28,7 @@
struct thread_struct;
struct Xgt_desc_struct;
struct tss_struct;
struct mm_struct;
struct paravirt_ops
{
	unsigned int kernel_rpl;
@@ -121,6 +123,23 @@ struct paravirt_ops
	unsigned long (fastcall *apic_read)(unsigned long reg);
#endif

	void (fastcall *flush_tlb_user)(void);
	void (fastcall *flush_tlb_kernel)(void);
	void (fastcall *flush_tlb_single)(u32 addr);

	void (fastcall *set_pte)(pte_t *ptep, pte_t pteval);
	void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval);
	void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval);
	void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep);
	void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep);
#ifdef CONFIG_X86_PAE
	void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval);
	void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
	void (fastcall *set_pud)(pud_t *pudp, pud_t pudval);
	void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
	void (fastcall *pmd_clear)(pmd_t *pmdp);
#endif

	/* These two are jmp to, not actually called. */
	void (fastcall *irq_enable_sysexit)(void);
	void (fastcall *iret)(void);
@@ -297,6 +316,62 @@ static inline unsigned long apic_read(unsigned long reg)
#endif


#define __flush_tlb() paravirt_ops.flush_tlb_user()
#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel()
#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr)

static inline void set_pte(pte_t *ptep, pte_t pteval)
{
	paravirt_ops.set_pte(ptep, pteval);
}

static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
{
	paravirt_ops.set_pte_at(mm, addr, ptep, pteval);
}

static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
	paravirt_ops.set_pmd(pmdp, pmdval);
}

static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep)
{
	paravirt_ops.pte_update(mm, addr, ptep);
}

static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
{
	paravirt_ops.pte_update_defer(mm, addr, ptep);
}

#ifdef CONFIG_X86_PAE
static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
{
	paravirt_ops.set_pte_atomic(ptep, pteval);
}

static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
	paravirt_ops.set_pte_present(mm, addr, ptep, pte);
}

static inline void set_pud(pud_t *pudp, pud_t pudval)
{
	paravirt_ops.set_pud(pudp, pudval);
}

static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
	paravirt_ops.pte_clear(mm, addr, ptep);
}

static inline void pmd_clear(pmd_t *pmdp)
{
	paravirt_ops.pmd_clear(pmdp);
}
#endif

/* These all sit in the .parainstructions section to tell us what to patch. */
struct paravirt_patch {
	u8 *instr; 		/* original instructions */
+4 −1
Original line number Diff line number Diff line
@@ -13,11 +13,14 @@
 * within a page table are directly modified.  Thus, the following
 * hook is made available.
 */
#ifndef CONFIG_PARAVIRT
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
#endif

#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))

#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
+21 −19
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@ static inline int pte_exec_kernel(pte_t pte)
	return pte_x(pte);
}

#ifndef CONFIG_PARAVIRT
/* Rules for using set_pte: the pte being assigned *must* be
 * either not present or in a state where the hardware will
 * not attempt to update the pte.  In places where this is
@@ -80,25 +81,6 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte
#define set_pud(pudptr,pudval) \
		(*(pudptr) = (pudval))

/*
 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
 * the TLB via cr3 if the top-level pgd is changed...
 * We do not let the generic code free and clear pgd entries due to
 * this erratum.
 */
static inline void pud_clear (pud_t * pud) { }

#define pud_page(pud) \
((struct page *) __va(pud_val(pud) & PAGE_MASK))

#define pud_page_vaddr(pud) \
((unsigned long) __va(pud_val(pud) & PAGE_MASK))


/* Find an entry in the second-level page table.. */
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
			pmd_index(address))

/*
 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
 * entry, so clear the bottom half first and enforce ordering with a compiler
@@ -118,6 +100,26 @@ static inline void pmd_clear(pmd_t *pmd)
	smp_wmb();
	*(tmp + 1) = 0;
}
#endif

/*
 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
 * the TLB via cr3 if the top-level pgd is changed...
 * We do not let the generic code free and clear pgd entries due to
 * this erratum.
 */
static inline void pud_clear (pud_t * pud) { }

#define pud_page(pud) \
((struct page *) __va(pud_val(pud) & PAGE_MASK))

#define pud_page_vaddr(pud) \
((unsigned long) __va(pud_val(pud) & PAGE_MASK))


/* Find an entry in the second-level page table.. */
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
			pmd_index(address))

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
Loading