Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 00c89b2f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-traps' (trap handling from Andy Lutomirski)

Merge x86-64 iret fixes from Andy Lutomirski:
 "This addresses the following issues:

   - an unrecoverable double-fault triggerable with modify_ldt.
   - invalid stack usage in espfix64 failed IRET recovery from IST
     context.
   - invalid stack usage in non-espfix64 failed IRET recovery from IST
     context.

  It also makes a good but IMO scary change: non-espfix64 failed IRET
  will now report the correct error.  Hopefully nothing depended on the
  old incorrect behavior, but maybe Wine will get confused in some
  obscure corner case"

* emailed patches from Andy Lutomirski <luto@amacapital.net>:
  x86_64, traps: Rework bad_iret
  x86_64, traps: Stop using IST for #SS
  x86_64, traps: Fix the espfix64 #DF fixup and rewrite it in C
parents 27946315 b645af2d
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@
#define THREAD_SIZE_ORDER	1
#define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)

#define STACKFAULT_STACK 0
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 0
#define DEBUG_STACK 0
+5 −6
Original line number Diff line number Diff line
@@ -14,12 +14,11 @@
#define IRQ_STACK_ORDER 2
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)

#define STACKFAULT_STACK 1
#define DOUBLEFAULT_STACK 2
#define NMI_STACK 3
#define DEBUG_STACK 4
#define MCE_STACK 5
#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
#define DOUBLEFAULT_STACK 1
#define NMI_STACK 2
#define DEBUG_STACK 3
#define MCE_STACK 4
#define N_EXCEPTION_STACKS 4  /* hw limit: 7 */

#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void);

#ifdef CONFIG_TRACING
asmlinkage void trace_page_fault(void);
#define trace_stack_segment stack_segment
#define trace_divide_error divide_error
#define trace_bounds bounds
#define trace_invalid_op invalid_op
+0 −1
Original line number Diff line number Diff line
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = {
		[ DEBUG_STACK-1			]	= "#DB",
		[ NMI_STACK-1			]	= "NMI",
		[ DOUBLEFAULT_STACK-1		]	= "#DF",
		[ STACKFAULT_STACK-1		]	= "#SS",
		[ MCE_STACK-1			]	= "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
		[ N_EXCEPTION_STACKS ...
+22 −59
Original line number Diff line number Diff line
@@ -828,9 +828,15 @@ ENTRY(native_iret)
	jnz native_irq_return_ldt
#endif

.global native_irq_return_iret
native_irq_return_iret:
	/*
	 * This may fault.  Non-paranoid faults on return to userspace are
	 * handled by fixup_bad_iret.  These include #SS, #GP, and #NP.
	 * Double-faults due to espfix64 are handled in do_double_fault.
	 * Other faults here are fatal.
	 */
	iretq
	_ASM_EXTABLE(native_irq_return_iret, bad_iret)

#ifdef CONFIG_X86_ESPFIX64
native_irq_return_ldt:
@@ -858,25 +864,6 @@ native_irq_return_ldt:
	jmp native_irq_return_iret
#endif

	.section .fixup,"ax"
bad_iret:
	/*
	 * The iret traps when the %cs or %ss being restored is bogus.
	 * We've lost the original trap vector and error code.
	 * #GPF is the most likely one to get for an invalid selector.
	 * So pretend we completed the iret and took the #GPF in user mode.
	 *
	 * We are now running with the kernel GS after exception recovery.
	 * But error_entry expects us to have user GS to match the user %cs,
	 * so swap back.
	 */
	pushq $0

	SWAPGS
	jmp general_protection

	.previous

	/* edi: workmask, edx: work */
retint_careful:
	CFI_RESTORE_STATE
@@ -922,37 +909,6 @@ ENTRY(retint_kernel)
	CFI_ENDPROC
END(common_interrupt)

	/*
	 * If IRET takes a fault on the espfix stack, then we
	 * end up promoting it to a doublefault.  In that case,
	 * modify the stack to make it look like we just entered
	 * the #GP handler from user space, similar to bad_iret.
	 */
#ifdef CONFIG_X86_ESPFIX64
	ALIGN
__do_double_fault:
	XCPT_FRAME 1 RDI+8
	movq RSP(%rdi),%rax		/* Trap on the espfix stack? */
	sarq $PGDIR_SHIFT,%rax
	cmpl $ESPFIX_PGD_ENTRY,%eax
	jne do_double_fault		/* No, just deliver the fault */
	cmpl $__KERNEL_CS,CS(%rdi)
	jne do_double_fault
	movq RIP(%rdi),%rax
	cmpq $native_irq_return_iret,%rax
	jne do_double_fault		/* This shouldn't happen... */
	movq PER_CPU_VAR(kernel_stack),%rax
	subq $(6*8-KERNEL_STACK_OFFSET),%rax	/* Reset to original stack */
	movq %rax,RSP(%rdi)
	movq $0,(%rax)			/* Missing (lost) #GP error code */
	movq $general_protection,RIP(%rdi)
	retq
	CFI_ENDPROC
END(__do_double_fault)
#else
# define __do_double_fault do_double_fault
#endif

/*
 * APIC interrupts.
 */
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
idtentry double_fault __do_double_fault has_error_code=1 paranoid=1
idtentry double_fault do_double_fault has_error_code=1 paranoid=1
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \

idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
idtentry xen_debug do_debug has_error_code=0
idtentry xen_int3 do_int3 has_error_code=0
@@ -1399,17 +1355,16 @@ error_sti:

/*
 * There are two places in the kernel that can potentially fault with
 * usergs. Handle them here. The exception handlers after iret run with
 * kernel gs again, so don't set the user space flag. B stepping K8s
 * sometimes report an truncated RIP for IRET exceptions returning to
 * compat mode. Check for these here too.
 * usergs. Handle them here.  B stepping K8s sometimes report a
 * truncated RIP for IRET exceptions returning to compat mode. Check
 * for these here too.
 */
error_kernelspace:
	CFI_REL_OFFSET rcx, RCX+8
	incl %ebx
	leaq native_irq_return_iret(%rip),%rcx
	cmpq %rcx,RIP+8(%rsp)
	je error_swapgs
	je error_bad_iret
	movl %ecx,%eax	/* zero extend */
	cmpq %rax,RIP+8(%rsp)
	je bstep_iret
@@ -1420,7 +1375,15 @@ error_kernelspace:
bstep_iret:
	/* Fix truncated RIP */
	movq %rcx,RIP+8(%rsp)
	jmp error_swapgs
	/* fall through */

error_bad_iret:
	SWAPGS
	mov %rsp,%rdi
	call fixup_bad_iret
	mov %rax,%rsp
	decl %ebx	/* Return to usergs */
	jmp error_sti
	CFI_ENDPROC
END(error_entry)

Loading