Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6cff64b8 authored by Dave Hansen's avatar Dave Hansen Committed by Ingo Molnar
Browse files

x86/mm: Use INVPCID for __native_flush_tlb_single()

This uses INVPCID to shoot down individual lines of the user mapping
instead of marking the entire user map as invalid. This
could/might/possibly be faster.

This for sure needs tlb_single_page_flush_ceiling to be redetermined;
esp. since INVPCID is _slow_.

A detailed performance analysis is available here:

  https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com



[ Peterz: Split out from big combo patch ]

Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 21e94459
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -197,6 +197,7 @@
#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */

#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
+22 −1
Original line number Diff line number Diff line
@@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid)
	return asid + 1;
}

/*
 * The user PCID is just the kernel one, plus the "switch bit".
 */
static inline u16 user_pcid(u16 asid)
{
	u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
#endif
	return ret;
}

struct pgd_t;
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{
@@ -335,6 +347,8 @@ static inline void __native_flush_tlb_global(void)
		/*
		 * Using INVPCID is considerably faster than a pair of writes
		 * to CR4 sandwiched inside an IRQ flag save/restore.
		 *
		 * Note, this works with CR4.PCIDE=0 or 1.
		 */
		invpcid_flush_all();
		return;
@@ -368,7 +382,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
	if (!static_cpu_has(X86_FEATURE_PTI))
		return;

	/*
	 * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
	 * Just use invalidate_user_asid() in case we are called early.
	 */
	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
		invalidate_user_asid(loaded_mm_asid);
	else
		invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
}

/*
+37 −27
Original line number Diff line number Diff line
@@ -203,35 +203,45 @@ static void __init probe_page_size_mask(void)

static void setup_pcid(void)
{
#ifdef CONFIG_X86_64
	if (boot_cpu_has(X86_FEATURE_PCID)) {
	if (!IS_ENABLED(CONFIG_X86_64))
		return;

	if (!boot_cpu_has(X86_FEATURE_PCID))
		return;

	if (boot_cpu_has(X86_FEATURE_PGE)) {
		/*
			 * This can't be cr4_set_bits_and_update_boot() --
			 * the trampoline code can't handle CR4.PCIDE and
			 * it wouldn't do any good anyway.  Despite the name,
			 * cr4_set_bits_and_update_boot() doesn't actually
			 * cause the bits in question to remain set all the
			 * way through the secondary boot asm.
		 * This can't be cr4_set_bits_and_update_boot() -- the
		 * trampoline code can't handle CR4.PCIDE and it wouldn't
		 * do any good anyway.  Despite the name,
		 * cr4_set_bits_and_update_boot() doesn't actually cause
		 * the bits in question to remain set all the way through
		 * the secondary boot asm.
		 *
			 * Instead, we brute-force it and set CR4.PCIDE
			 * manually in start_secondary().
		 * Instead, we brute-force it and set CR4.PCIDE manually in
		 * start_secondary().
		 */
		cr4_set_bits(X86_CR4_PCIDE);

		/*
		 * INVPCID's single-context modes (2/3) only work if we set
		 * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
		 * on systems that have X86_CR4_PCIDE clear, or that have
		 * no INVPCID support at all.
		 */
		if (boot_cpu_has(X86_FEATURE_INVPCID))
			setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
	} else {
		/*
			 * flush_tlb_all(), as currently implemented, won't
			 * work if PCID is on but PGE is not.  Since that
			 * combination doesn't exist on real hardware, there's
			 * no reason to try to fully support it, but it's
			 * polite to avoid corrupting data if we're on
			 * an improperly configured VM.
		 * flush_tlb_all(), as currently implemented, won't work if
		 * PCID is on but PGE is not.  Since that combination
		 * doesn't exist on real hardware, there's no reason to try
		 * to fully support it, but it's polite to avoid corrupting
		 * data if we're on an improperly configured VM.
		 */
		setup_clear_cpu_cap(X86_FEATURE_PCID);
	}
}
#endif
}

#ifdef CONFIG_X86_32
#define NR_RANGE_MR 3