Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eac34119 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull x86 PTI updates from Thomas Gleixner:
 "The Speck brigade sadly provides yet another large set of patches
  destroying the perfomance which we carefully built and preserved

   - PTI support for 32bit PAE. The missing counter part to the 64bit
     PTI code implemented by Joerg.

   - A set of fixes for the Global Bit mechanics for non PCID CPUs which
     were setting the Global Bit too widely and therefore possibly
     exposing interesting memory needlessly.

   - Protection against userspace-userspace SpectreRSB

   - Support for the upcoming Enhanced IBRS mode, which is preferred
     over IBRS. Unfortunately we dont know the performance impact of
     this, but it's expected to be less horrible than the IBRS
     hammering.

   - Cleanups and simplifications"

* 'x86/pti' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits)
  x86/mm/pti: Move user W+X check into pti_finalize()
  x86/relocs: Add __end_rodata_aligned to S_REL
  x86/mm/pti: Clone kernel-image on PTE level for 32 bit
  x86/mm/pti: Don't clear permissions in pti_clone_pmd()
  x86/mm/pti: Fix 32 bit PCID check
  x86/mm/init: Remove freed kernel image areas from alias mapping
  x86/mm/init: Add helper for freeing kernel image pages
  x86/mm/init: Pass unconverted symbol addresses to free_init_pages()
  mm: Allow non-direct-map arguments to free_reserved_area()
  x86/mm/pti: Clear Global bit more aggressively
  x86/speculation: Support Enhanced IBRS on future CPUs
  x86/speculation: Protect against userspace-userspace spectreRSB
  x86/kexec: Allocate 8k PGDs for PTI
  Revert "perf/core: Make sure the ring-buffer is mapped in all page-tables"
  x86/mm: Remove in_nmi() warning from vmalloc_fault()
  x86/entry/32: Check for VM86 mode in slow-path check
  perf/core: Make sure the ring-buffer is mapped in all page-tables
  x86/pti: Check the return value of pti_user_pagetable_walk_pmd()
  x86/pti: Check the return value of pti_user_pagetable_walk_p4d()
  x86/entry/32: Add debug code to check entry/exit CR3
  ...
parents d191c82d d878efce
Loading
Loading
Loading
Loading
+522 −110
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@
# define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
# define resume_kernel		restore_all
# define resume_kernel		restore_all_kernel
#endif

.macro TRACE_IRQS_IRET
@@ -77,6 +77,8 @@
#endif
.endm

#define PTI_SWITCH_MASK         (1 << PAGE_SHIFT)

/*
 * User gs save/restore
 *
@@ -154,7 +156,52 @@

#endif /* CONFIG_X86_32_LAZY_GS */

.macro SAVE_ALL pt_regs_ax=%eax
/* Unconditionally switch to user cr3 */
.macro SWITCH_TO_USER_CR3 scratch_reg:req
	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI

	movl	%cr3, \scratch_reg
	orl	$PTI_SWITCH_MASK, \scratch_reg
	movl	\scratch_reg, %cr3
.Lend_\@:
.endm

.macro BUG_IF_WRONG_CR3 no_user_check=0
#ifdef CONFIG_DEBUG_ENTRY
	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
	.if \no_user_check == 0
	/* coming from usermode? */
	testl	$SEGMENT_RPL_MASK, PT_CS(%esp)
	jz	.Lend_\@
	.endif
	/* On user-cr3? */
	movl	%cr3, %eax
	testl	$PTI_SWITCH_MASK, %eax
	jnz	.Lend_\@
	/* From userspace with kernel cr3 - BUG */
	ud2
.Lend_\@:
#endif
.endm

/*
 * Switch to kernel cr3 if not already loaded and return current cr3 in
 * \scratch_reg
 */
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
	movl	%cr3, \scratch_reg
	/* Test if we are already on kernel CR3 */
	testl	$PTI_SWITCH_MASK, \scratch_reg
	jz	.Lend_\@
	andl	$(~PTI_SWITCH_MASK), \scratch_reg
	movl	\scratch_reg, %cr3
	/* Return original CR3 in \scratch_reg */
	orl	$PTI_SWITCH_MASK, \scratch_reg
.Lend_\@:
.endm

.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
	cld
	PUSH_GS
	pushl	%fs
@@ -173,6 +220,29 @@
	movl	$(__KERNEL_PERCPU), %edx
	movl	%edx, %fs
	SET_KERNEL_GS %edx

	/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
	SWITCH_TO_KERNEL_STACK
.endif

.endm

.macro SAVE_ALL_NMI cr3_reg:req
	SAVE_ALL

	BUG_IF_WRONG_CR3

	/*
	 * Now switch the CR3 when PTI is enabled.
	 *
	 * We can enter with either user or kernel cr3, the code will
	 * store the old cr3 in \cr3_reg and switches to the kernel cr3
	 * if necessary.
	 */
	SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg

.Lend_\@:
.endm

/*
@@ -221,6 +291,349 @@
	POP_GS_EX
.endm

.macro RESTORE_ALL_NMI cr3_reg:req pop=0
	/*
	 * Now switch the CR3 when PTI is enabled.
	 *
	 * We enter with kernel cr3 and switch the cr3 to the value
	 * stored on \cr3_reg, which is either a user or a kernel cr3.
	 */
	ALTERNATIVE "jmp .Lswitched_\@", "", X86_FEATURE_PTI

	testl	$PTI_SWITCH_MASK, \cr3_reg
	jz	.Lswitched_\@

	/* User cr3 in \cr3_reg - write it to hardware cr3 */
	movl	\cr3_reg, %cr3

.Lswitched_\@:

	BUG_IF_WRONG_CR3

	RESTORE_REGS pop=\pop
.endm

.macro CHECK_AND_APPLY_ESPFIX
#ifdef CONFIG_X86_ESPFIX32
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)

	ALTERNATIVE	"jmp .Lend_\@", "", X86_BUG_ESPFIX

	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
	/*
	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
	 * are returning to the kernel.
	 * See comments in process.c:copy_thread() for details.
	 */
	movb	PT_OLDSS(%esp), %ah
	movb	PT_CS(%esp), %al
	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
	jne	.Lend_\@	# returning to user-space with LDT SS

	/*
	 * Setup and switch to ESPFIX stack
	 *
	 * We're returning to userspace with a 16 bit stack. The CPU will not
	 * restore the high word of ESP for us on executing iret... This is an
	 * "official" bug of all the x86-compatible CPUs, which we can work
	 * around to make dosemu and wine happy. We do this by preloading the
	 * high word of ESP with the high word of the userspace ESP while
	 * compensating for the offset by changing to the ESPFIX segment with
	 * a base address that matches for the difference.
	 */
	mov	%esp, %edx			/* load kernel esp */
	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
	mov	%dx, %ax			/* eax: new kernel esp */
	sub	%eax, %edx			/* offset (low word is 0) */
	shr	$16, %edx
	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
	pushl	$__ESPFIX_SS
	pushl	%eax				/* new kernel esp */
	/*
	 * Disable interrupts, but do not irqtrace this section: we
	 * will soon execute iret and the tracer was already set to
	 * the irqstate after the IRET:
	 */
	DISABLE_INTERRUPTS(CLBR_ANY)
	lss	(%esp), %esp			/* switch to espfix segment */
.Lend_\@:
#endif /* CONFIG_X86_ESPFIX32 */
.endm

/*
 * Called with pt_regs fully populated and kernel segments loaded,
 * so we can access PER_CPU and use the integer registers.
 *
 * We need to be very careful here with the %esp switch, because an NMI
 * can happen everywhere. If the NMI handler finds itself on the
 * entry-stack, it will overwrite the task-stack and everything we
 * copied there. So allocate the stack-frame on the task-stack and
 * switch to it before we do any copying.
 */

#define CS_FROM_ENTRY_STACK	(1 << 31)
#define CS_FROM_USER_CR3	(1 << 30)

.macro SWITCH_TO_KERNEL_STACK

	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV

	BUG_IF_WRONG_CR3

	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax

	/*
	 * %eax now contains the entry cr3 and we carry it forward in
	 * that register for the time this macro runs
	 */

	/* Are we on the entry stack? Bail out if not! */
	movl	PER_CPU_VAR(cpu_entry_area), %ecx
	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
	subl	%esp, %ecx	/* ecx = (end of entry_stack) - esp */
	cmpl	$SIZEOF_entry_stack, %ecx
	jae	.Lend_\@

	/* Load stack pointer into %esi and %edi */
	movl	%esp, %esi
	movl	%esi, %edi

	/* Move %edi to the top of the entry stack */
	andl	$(MASK_entry_stack), %edi
	addl	$(SIZEOF_entry_stack), %edi

	/* Load top of task-stack into %edi */
	movl	TSS_entry2task_stack(%edi), %edi

	/*
	 * Clear unused upper bits of the dword containing the word-sized CS
	 * slot in pt_regs in case hardware didn't clear it for us.
	 */
	andl	$(0x0000ffff), PT_CS(%esp)

	/* Special case - entry from kernel mode via entry stack */
#ifdef CONFIG_VM86
	movl	PT_EFLAGS(%esp), %ecx		# mix EFLAGS and CS
	movb	PT_CS(%esp), %cl
	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx
#else
	movl	PT_CS(%esp), %ecx
	andl	$SEGMENT_RPL_MASK, %ecx
#endif
	cmpl	$USER_RPL, %ecx
	jb	.Lentry_from_kernel_\@

	/* Bytes to copy */
	movl	$PTREGS_SIZE, %ecx

#ifdef CONFIG_VM86
	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esi)
	jz	.Lcopy_pt_regs_\@

	/*
	 * Stack-frame contains 4 additional segment registers when
	 * coming from VM86 mode
	 */
	addl	$(4 * 4), %ecx

#endif
.Lcopy_pt_regs_\@:

	/* Allocate frame on task-stack */
	subl	%ecx, %edi

	/* Switch to task-stack */
	movl	%edi, %esp

	/*
	 * We are now on the task-stack and can safely copy over the
	 * stack-frame
	 */
	shrl	$2, %ecx
	cld
	rep movsl

	jmp .Lend_\@

.Lentry_from_kernel_\@:

	/*
	 * This handles the case when we enter the kernel from
	 * kernel-mode and %esp points to the entry-stack. When this
	 * happens we need to switch to the task-stack to run C code,
	 * but switch back to the entry-stack again when we approach
	 * iret and return to the interrupted code-path. This usually
	 * happens when we hit an exception while restoring user-space
	 * segment registers on the way back to user-space or when the
	 * sysenter handler runs with eflags.tf set.
	 *
	 * When we switch to the task-stack here, we can't trust the
	 * contents of the entry-stack anymore, as the exception handler
	 * might be scheduled out or moved to another CPU. Therefore we
	 * copy the complete entry-stack to the task-stack and set a
	 * marker in the iret-frame (bit 31 of the CS dword) to detect
	 * what we've done on the iret path.
	 *
	 * On the iret path we copy everything back and switch to the
	 * entry-stack, so that the interrupted kernel code-path
	 * continues on the same stack it was interrupted with.
	 *
	 * Be aware that an NMI can happen anytime in this code.
	 *
	 * %esi: Entry-Stack pointer (same as %esp)
	 * %edi: Top of the task stack
	 * %eax: CR3 on kernel entry
	 */

	/* Calculate number of bytes on the entry stack in %ecx */
	movl	%esi, %ecx

	/* %ecx to the top of entry-stack */
	andl	$(MASK_entry_stack), %ecx
	addl	$(SIZEOF_entry_stack), %ecx

	/* Number of bytes on the entry stack to %ecx */
	sub	%esi, %ecx

	/* Mark stackframe as coming from entry stack */
	orl	$CS_FROM_ENTRY_STACK, PT_CS(%esp)

	/*
	 * Test the cr3 used to enter the kernel and add a marker
	 * so that we can switch back to it before iret.
	 */
	testl	$PTI_SWITCH_MASK, %eax
	jz	.Lcopy_pt_regs_\@
	orl	$CS_FROM_USER_CR3, PT_CS(%esp)

	/*
	 * %esi and %edi are unchanged, %ecx contains the number of
	 * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate
	 * the stack-frame on task-stack and copy everything over
	 */
	jmp .Lcopy_pt_regs_\@

.Lend_\@:
.endm

/*
 * Switch back from the kernel stack to the entry stack.
 *
 * The %esp register must point to pt_regs on the task stack. It will
 * first calculate the size of the stack-frame to copy, depending on
 * whether we return to VM86 mode or not. With that it uses 'rep movsl'
 * to copy the contents of the stack over to the entry stack.
 *
 * We must be very careful here, as we can't trust the contents of the
 * task-stack once we switched to the entry-stack. When an NMI happens
 * while on the entry-stack, the NMI handler will switch back to the top
 * of the task stack, overwriting our stack-frame we are about to copy.
 * Therefore we switch the stack only after everything is copied over.
 */
.macro SWITCH_TO_ENTRY_STACK

	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV

	/* Bytes to copy */
	movl	$PTREGS_SIZE, %ecx

#ifdef CONFIG_VM86
	testl	$(X86_EFLAGS_VM), PT_EFLAGS(%esp)
	jz	.Lcopy_pt_regs_\@

	/* Additional 4 registers to copy when returning to VM86 mode */
	addl    $(4 * 4), %ecx

.Lcopy_pt_regs_\@:
#endif

	/* Initialize source and destination for movsl */
	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
	subl	%ecx, %edi
	movl	%esp, %esi

	/* Save future stack pointer in %ebx */
	movl	%edi, %ebx

	/* Copy over the stack-frame */
	shrl	$2, %ecx
	cld
	rep movsl

	/*
	 * Switch to entry-stack - needs to happen after everything is
	 * copied because the NMI handler will overwrite the task-stack
	 * when on entry-stack
	 */
	movl	%ebx, %esp

.Lend_\@:
.endm

/*
 * This macro handles the case when we return to kernel-mode on the iret
 * path and have to switch back to the entry stack and/or user-cr3
 *
 * See the comments below the .Lentry_from_kernel_\@ label in the
 * SWITCH_TO_KERNEL_STACK macro for more details.
 */
.macro PARANOID_EXIT_TO_KERNEL_MODE

	/*
	 * Test if we entered the kernel with the entry-stack. Most
	 * likely we did not, because this code only runs on the
	 * return-to-kernel path.
	 */
	testl	$CS_FROM_ENTRY_STACK, PT_CS(%esp)
	jz	.Lend_\@

	/* Unlikely slow-path */

	/* Clear marker from stack-frame */
	andl	$(~CS_FROM_ENTRY_STACK), PT_CS(%esp)

	/* Copy the remaining task-stack contents to entry-stack */
	movl	%esp, %esi
	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi

	/* Bytes on the task-stack to ecx */
	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx
	subl	%esi, %ecx

	/* Allocate stack-frame on entry-stack */
	subl	%ecx, %edi

	/*
	 * Save future stack-pointer, we must not switch until the
	 * copy is done, otherwise the NMI handler could destroy the
	 * contents of the task-stack we are about to copy.
	 */
	movl	%edi, %ebx

	/* Do the copy */
	shrl	$2, %ecx
	cld
	rep movsl

	/* Safe to switch to entry-stack now */
	movl	%ebx, %esp

	/*
	 * We came from entry-stack and need to check if we also need to
	 * switch back to user cr3.
	 */
	testl	$CS_FROM_USER_CR3, PT_CS(%esp)
	jz	.Lend_\@

	/* Clear marker from stack-frame */
	andl	$(~CS_FROM_USER_CR3), PT_CS(%esp)

	SWITCH_TO_USER_CR3 scratch_reg=%eax

.Lend_\@:
.endm
/*
 * %eax: prev task
 * %edx: next task
@@ -351,9 +764,9 @@ ENTRY(resume_kernel)
	DISABLE_INTERRUPTS(CLBR_ANY)
.Lneed_resched:
	cmpl	$0, PER_CPU_VAR(__preempt_count)
	jnz	restore_all
	jnz	restore_all_kernel
	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
	jz	restore_all
	jz	restore_all_kernel
	call	preempt_schedule_irq
	jmp	.Lneed_resched
END(resume_kernel)
@@ -412,7 +825,21 @@ ENTRY(xen_sysenter_target)
 * 0(%ebp) arg6
 */
ENTRY(entry_SYSENTER_32)
	movl	TSS_sysenter_sp0(%esp), %esp
	/*
	 * On entry-stack with all userspace-regs live - save and
	 * restore eflags and %eax to use it as scratch-reg for the cr3
	 * switch.
	 */
	pushfl
	pushl	%eax
	BUG_IF_WRONG_CR3 no_user_check=1
	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
	popl	%eax
	popfl

	/* Stack empty again, switch to task stack */
	movl	TSS_entry2task_stack(%esp), %esp

.Lsysenter_past_esp:
	pushl	$__USER_DS		/* pt_regs->ss */
	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
@@ -421,7 +848,7 @@ ENTRY(entry_SYSENTER_32)
	pushl	$__USER_CS		/* pt_regs->cs */
	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
	pushl	%eax			/* pt_regs->orig_ax */
	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest, stack already switched */

	/*
	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
@@ -460,25 +887,49 @@ ENTRY(entry_SYSENTER_32)

/* Opportunistic SYSEXIT */
	TRACE_IRQS_ON			/* User mode traces as IRQs on. */

	/*
	 * Setup entry stack - we keep the pointer in %eax and do the
	 * switch after almost all user-state is restored.
	 */

	/* Load entry stack pointer and allocate frame for eflags/eax */
	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax
	subl	$(2*4), %eax

	/* Copy eflags and eax to entry stack */
	movl	PT_EFLAGS(%esp), %edi
	movl	PT_EAX(%esp), %esi
	movl	%edi, (%eax)
	movl	%esi, 4(%eax)

	/* Restore user registers and segments */
	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
1:	mov	PT_FS(%esp), %fs
	PTGS_TO_GS

	popl	%ebx			/* pt_regs->bx */
	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
	popl	%esi			/* pt_regs->si */
	popl	%edi			/* pt_regs->di */
	popl	%ebp			/* pt_regs->bp */
	popl	%eax			/* pt_regs->ax */

	/* Switch to entry stack */
	movl	%eax, %esp

	/* Now ready to switch the cr3 */
	SWITCH_TO_USER_CR3 scratch_reg=%eax

	/*
	 * Restore all flags except IF. (We restore IF separately because
	 * STI gives a one-instruction window in which we won't be interrupted,
	 * whereas POPF does not.)
	 */
	addl	$PT_EFLAGS-PT_DS, %esp	/* point esp at pt_regs->flags */
	btrl	$X86_EFLAGS_IF_BIT, (%esp)
	BUG_IF_WRONG_CR3 no_user_check=1
	popfl
	popl	%eax

	/*
	 * Return back to the vDSO, which will pop ecx and edx.
@@ -532,7 +983,8 @@ ENDPROC(entry_SYSENTER_32)
ENTRY(entry_INT80_32)
	ASM_CLAC
	pushl	%eax			/* pt_regs->orig_ax */
	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */

	SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1	/* save rest */

	/*
	 * User mode is traced as though IRQs are on, and the interrupt gate
@@ -546,24 +998,17 @@ ENTRY(entry_INT80_32)

restore_all:
	TRACE_IRQS_IRET
	SWITCH_TO_ENTRY_STACK
.Lrestore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
	ALTERNATIVE	"jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX

	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
	/*
	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
	 * are returning to the kernel.
	 * See comments in process.c:copy_thread() for details.
	 */
	movb	PT_OLDSS(%esp), %ah
	movb	PT_CS(%esp), %al
	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
	je .Lldt_ss				# returning to user-space with LDT SS
#endif
	CHECK_AND_APPLY_ESPFIX
.Lrestore_nocheck:
	RESTORE_REGS 4				# skip orig_eax/error_code
	/* Switch back to user CR3 */
	SWITCH_TO_USER_CR3 scratch_reg=%eax

	BUG_IF_WRONG_CR3

	/* Restore user state */
	RESTORE_REGS pop=4			# skip orig_eax/error_code
.Lirq_return:
	/*
	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -572,46 +1017,33 @@ restore_all:
	 */
	INTERRUPT_RETURN

restore_all_kernel:
	TRACE_IRQS_IRET
	PARANOID_EXIT_TO_KERNEL_MODE
	BUG_IF_WRONG_CR3
	RESTORE_REGS 4
	jmp	.Lirq_return

.section .fixup, "ax"
ENTRY(iret_exc	)
	pushl	$0				# no error code
	pushl	$do_iret_error
	jmp	common_exception
.previous
	_ASM_EXTABLE(.Lirq_return, iret_exc)

#ifdef CONFIG_X86_ESPFIX32
.Lldt_ss:
/*
 * Setup and switch to ESPFIX stack
 *
 * We're returning to userspace with a 16 bit stack. The CPU will not
 * restore the high word of ESP for us on executing iret... This is an
 * "official" bug of all the x86-compatible CPUs, which we can work
 * around to make dosemu and wine happy. We do this by preloading the
 * high word of ESP with the high word of the userspace ESP while
 * compensating for the offset by changing to the ESPFIX segment with
 * a base address that matches for the difference.
 */
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
	mov	%esp, %edx			/* load kernel esp */
	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
	mov	%dx, %ax			/* eax: new kernel esp */
	sub	%eax, %edx			/* offset (low word is 0) */
	shr	$16, %edx
	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
	pushl	$__ESPFIX_SS
	pushl	%eax				/* new kernel esp */
#ifdef CONFIG_DEBUG_ENTRY
	/*
	 * Disable interrupts, but do not irqtrace this section: we
	 * will soon execute iret and the tracer was already set to
	 * the irqstate after the IRET:
	 * The stack-frame here is the one that iret faulted on, so its a
	 * return-to-user frame. We are on kernel-cr3 because we come here from
	 * the fixup code. This confuses the CR3 checker, so switch to user-cr3
	 * as the checker expects it.
	 */
	DISABLE_INTERRUPTS(CLBR_ANY)
	lss	(%esp), %esp			/* switch to espfix segment */
	jmp	.Lrestore_nocheck
	pushl	%eax
	SWITCH_TO_USER_CR3 scratch_reg=%eax
	popl	%eax
#endif

	jmp	common_exception
.previous
	_ASM_EXTABLE(.Lirq_return, iret_exc)
ENDPROC(entry_INT80_32)

.macro FIXUP_ESPFIX_STACK
@@ -671,7 +1103,8 @@ END(irq_entries_start)
common_interrupt:
	ASM_CLAC
	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
	SAVE_ALL

	SAVE_ALL switch_stacks=1
	ENCODE_FRAME_POINTER
	TRACE_IRQS_OFF
	movl	%esp, %eax
@@ -683,7 +1116,7 @@ ENDPROC(common_interrupt)
ENTRY(name)						\
	ASM_CLAC;					\
	pushl	$~(nr);					\
	SAVE_ALL;			\
	SAVE_ALL switch_stacks=1;			\
	ENCODE_FRAME_POINTER;				\
	TRACE_IRQS_OFF					\
	movl	%esp, %eax;				\
@@ -920,16 +1353,20 @@ common_exception:
	pushl	%es
	pushl	%ds
	pushl	%eax
	movl	$(__USER_DS), %eax
	movl	%eax, %ds
	movl	%eax, %es
	movl	$(__KERNEL_PERCPU), %eax
	movl	%eax, %fs
	pushl	%ebp
	pushl	%edi
	pushl	%esi
	pushl	%edx
	pushl	%ecx
	pushl	%ebx
	SWITCH_TO_KERNEL_STACK
	ENCODE_FRAME_POINTER
	cld
	movl	$(__KERNEL_PERCPU), %ecx
	movl	%ecx, %fs
	UNWIND_ESPFIX_STACK
	GS_TO_REG %ecx
	movl	PT_GS(%esp), %edi		# get the function address
@@ -937,9 +1374,6 @@ common_exception:
	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
	REG_TO_PTGS %ecx
	SET_KERNEL_GS %ecx
	movl	$(__USER_DS), %ecx
	movl	%ecx, %ds
	movl	%ecx, %es
	TRACE_IRQS_OFF
	movl	%esp, %eax			# pt_regs pointer
	CALL_NOSPEC %edi
@@ -948,40 +1382,12 @@ END(common_exception)

ENTRY(debug)
	/*
	 * #DB can happen at the first instruction of
	 * entry_SYSENTER_32 or in Xen's SYSENTER prologue.  If this
	 * happens, then we will be running on a very small stack.  We
	 * need to detect this condition and switch to the thread
	 * stack before calling any C code at all.
	 *
	 * If you edit this code, keep in mind that NMIs can happen in here.
	 * Entry from sysenter is now handled in common_exception
	 */
	ASM_CLAC
	pushl	$-1				# mark this as an int
	SAVE_ALL
	ENCODE_FRAME_POINTER
	xorl	%edx, %edx			# error code 0
	movl	%esp, %eax			# pt_regs pointer

	/* Are we currently on the SYSENTER stack? */
	movl	PER_CPU_VAR(cpu_entry_area), %ecx
	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
	cmpl	$SIZEOF_entry_stack, %ecx
	jb	.Ldebug_from_sysenter_stack

	TRACE_IRQS_OFF
	call	do_debug
	jmp	ret_from_exception

.Ldebug_from_sysenter_stack:
	/* We're on the SYSENTER stack.  Switch off. */
	movl	%esp, %ebx
	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
	TRACE_IRQS_OFF
	call	do_debug
	movl	%ebx, %esp
	jmp	ret_from_exception
	pushl	$do_debug
	jmp	common_exception
END(debug)

/*
@@ -993,6 +1399,7 @@ END(debug)
 */
ENTRY(nmi)
	ASM_CLAC

#ifdef CONFIG_X86_ESPFIX32
	pushl	%eax
	movl	%ss, %eax
@@ -1002,7 +1409,7 @@ ENTRY(nmi)
#endif

	pushl	%eax				# pt_regs->orig_ax
	SAVE_ALL
	SAVE_ALL_NMI cr3_reg=%edi
	ENCODE_FRAME_POINTER
	xorl	%edx, %edx			# zero error code
	movl	%esp, %eax			# pt_regs pointer
@@ -1016,7 +1423,7 @@ ENTRY(nmi)

	/* Not on SYSENTER stack. */
	call	do_nmi
	jmp	.Lrestore_all_notrace
	jmp	.Lnmi_return

.Lnmi_from_sysenter_stack:
	/*
@@ -1027,7 +1434,11 @@ ENTRY(nmi)
	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
	call	do_nmi
	movl	%ebx, %esp
	jmp	.Lrestore_all_notrace

.Lnmi_return:
	CHECK_AND_APPLY_ESPFIX
	RESTORE_ALL_NMI cr3_reg=%edi pop=4
	jmp	.Lirq_return

#ifdef CONFIG_X86_ESPFIX32
.Lnmi_espfix_stack:
@@ -1042,12 +1453,12 @@ ENTRY(nmi)
	pushl	16(%esp)
	.endr
	pushl	%eax
	SAVE_ALL
	SAVE_ALL_NMI cr3_reg=%edi
	ENCODE_FRAME_POINTER
	FIXUP_ESPFIX_STACK			# %eax == %esp
	xorl	%edx, %edx			# zero error code
	call	do_nmi
	RESTORE_REGS
	RESTORE_ALL_NMI cr3_reg=%edi
	lss	12+4(%esp), %esp		# back to espfix stack
	jmp	.Lirq_return
#endif
@@ -1056,7 +1467,8 @@ END(nmi)
ENTRY(int3)
	ASM_CLAC
	pushl	$-1				# mark this as an int
	SAVE_ALL

	SAVE_ALL switch_stacks=1
	ENCODE_FRAME_POINTER
	TRACE_IRQS_OFF
	xorl	%edx, %edx			# zero error code
+1 −0
Original line number Diff line number Diff line
@@ -219,6 +219,7 @@
#define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
#define X86_FEATURE_IBRS_ENHANCED	( 7*32+29) /* Enhanced IBRS */

/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
+0 −5
Original line number Diff line number Diff line
@@ -71,12 +71,7 @@ struct ldt_struct {

static inline void *ldt_slot_va(int slot)
{
#ifdef CONFIG_X86_64
	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
#else
	BUG();
	return (void *)fix_to_virt(FIX_HOLE);
#endif
}

/*
+1 −1
Original line number Diff line number Diff line
@@ -214,7 +214,7 @@ enum spectre_v2_mitigation {
	SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
	SPECTRE_V2_RETPOLINE_GENERIC,
	SPECTRE_V2_RETPOLINE_AMD,
	SPECTRE_V2_IBRS,
	SPECTRE_V2_IBRS_ENHANCED,
};

/* The Speculative Store Bypass disable variants */
+9 −0
Original line number Diff line number Diff line
@@ -19,6 +19,9 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)

static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd);
#endif
	*pmdp = pmd;
}

@@ -58,6 +61,9 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
#ifdef CONFIG_SMP
static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0));
#endif
	return __pmd(xchg((pmdval_t *)xp, 0));
}
#else
@@ -67,6 +73,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#ifdef CONFIG_SMP
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0));
#endif
	return __pud(xchg((pudval_t *)xp, 0));
}
#else
Loading