Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3c88c692 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

x86/stackframe/32: Provide consistent pt_regs



Currently pt_regs on x86_32 has an oddity in that kernel regs
(!user_mode(regs)) are short two entries (esp/ss). This means that any
code trying to use them (typically: regs->sp) needs to jump through
some unfortunate hoops.

Change the entry code to fix this up and create a full pt_regs frame.

This then simplifies various trampolines in ftrace and kprobes, the
stack unwinder, ptrace, kdump and kgdb.

Much thanks to Josh for help with the cleanups!

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Acked-by: default avatarMasami Hiramatsu <mhiramat@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent ea1ed38d
Loading
Loading
Loading
Loading
+95 −10
Original line number Diff line number Diff line
@@ -202,9 +202,102 @@
.Lend_\@:
.endm

#define CS_FROM_ENTRY_STACK	(1 << 31)
#define CS_FROM_USER_CR3	(1 << 30)
#define CS_FROM_KERNEL		(1 << 29)

.macro FIXUP_FRAME
	/*
	 * The high bits of the CS dword (__csh) are used for CS_FROM_*.
	 * Clear them in case hardware didn't do this for us.
	 */
	andl	$0x0000ffff, 3*4(%esp)

#ifdef CONFIG_VM86
	testl	$X86_EFLAGS_VM, 4*4(%esp)
	jnz	.Lfrom_usermode_no_fixup_\@
#endif
	testl	$SEGMENT_RPL_MASK, 3*4(%esp)
	jnz	.Lfrom_usermode_no_fixup_\@

	orl	$CS_FROM_KERNEL, 3*4(%esp)

	/*
	 * When we're here from kernel mode; the (exception) stack looks like:
	 *
	 *  5*4(%esp) - <previous context>
	 *  4*4(%esp) - flags
	 *  3*4(%esp) - cs
	 *  2*4(%esp) - ip
	 *  1*4(%esp) - orig_eax
	 *  0*4(%esp) - gs / function
	 *
	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
	 * is complete and in particular regs->sp is correct. This gives us
	 * the original 5 enties as gap:
	 *
	 * 12*4(%esp) - <previous context>
	 * 11*4(%esp) - gap / flags
	 * 10*4(%esp) - gap / cs
	 *  9*4(%esp) - gap / ip
	 *  8*4(%esp) - gap / orig_eax
	 *  7*4(%esp) - gap / gs / function
	 *  6*4(%esp) - ss
	 *  5*4(%esp) - sp
	 *  4*4(%esp) - flags
	 *  3*4(%esp) - cs
	 *  2*4(%esp) - ip
	 *  1*4(%esp) - orig_eax
	 *  0*4(%esp) - gs / function
	 */

	pushl	%ss		# ss
	pushl	%esp		# sp (points at ss)
	addl	$6*4, (%esp)	# point sp back at the previous context
	pushl	6*4(%esp)	# flags
	pushl	6*4(%esp)	# cs
	pushl	6*4(%esp)	# ip
	pushl	6*4(%esp)	# orig_eax
	pushl	6*4(%esp)	# gs / function
.Lfrom_usermode_no_fixup_\@:
.endm

.macro IRET_FRAME
	testl $CS_FROM_KERNEL, 1*4(%esp)
	jz .Lfinished_frame_\@

	/*
	 * Reconstruct the 3 entry IRET frame right after the (modified)
	 * regs->sp without lowering %esp in between, such that an NMI in the
	 * middle doesn't scribble our stack.
	 */
	pushl	%eax
	pushl	%ecx
	movl	5*4(%esp), %eax		# (modified) regs->sp

	movl	4*4(%esp), %ecx		# flags
	movl	%ecx, -4(%eax)

	movl	3*4(%esp), %ecx		# cs
	andl	$0x0000ffff, %ecx
	movl	%ecx, -8(%eax)

	movl	2*4(%esp), %ecx		# ip
	movl	%ecx, -12(%eax)

	movl	1*4(%esp), %ecx		# eax
	movl	%ecx, -16(%eax)

	popl	%ecx
	lea	-16(%eax), %esp
	popl	%eax
.Lfinished_frame_\@:
.endm

.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
	cld
	PUSH_GS
	FIXUP_FRAME
	pushl	%fs
	pushl	%es
	pushl	%ds
@@ -358,9 +451,6 @@
 * switch to it before we do any copying.
 */

#define CS_FROM_ENTRY_STACK	(1 << 31)
#define CS_FROM_USER_CR3	(1 << 30)

.macro SWITCH_TO_KERNEL_STACK

	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -374,13 +464,6 @@
	 * that register for the time this macro runs
	 */

	/*
	 * The high bits of the CS dword (__csh) are used for
	 * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
	 * hardware didn't do this for us.
	 */
	andl	$(0x0000ffff), PT_CS(%esp)

	/* Are we on the entry stack? Bail out if not! */
	movl	PER_CPU_VAR(cpu_entry_area), %ecx
	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -990,6 +1073,7 @@ restore_all:
	/* Restore user state */
	RESTORE_REGS pop=4			# skip orig_eax/error_code
.Lirq_return:
	IRET_FRAME
	/*
	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
	 * when returning from IPI handler and when returning from
@@ -1340,6 +1424,7 @@ END(page_fault)

common_exception:
	/* the function address is in %gs's slot on the stack */
	FIXUP_FRAME
	pushl	%fs
	pushl	%es
	pushl	%ds
+0 −17
Original line number Diff line number Diff line
@@ -70,22 +70,6 @@ struct kimage;
#define KEXEC_BACKUP_SRC_START	(0UL)
#define KEXEC_BACKUP_SRC_END	(640 * 1024UL - 1)	/* 640K */

/*
 * CPU does not save ss and sp on stack if execution is already
 * running in kernel mode at the time of NMI occurrence. This code
 * fixes it.
 */
static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
				      struct pt_regs *oldregs)
{
#ifdef CONFIG_X86_32
	newregs->sp = (unsigned long)&(oldregs->sp);
	asm volatile("xorl %%eax, %%eax\n\t"
		     "movw %%ss, %%ax\n\t"
		     :"=a"(newregs->ss));
#endif
}

/*
 * This function is responsible for capturing register states if coming
 * via panic otherwise just fix up the ss and sp if coming via kernel
@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
	if (oldregs) {
		memcpy(newregs, oldregs, sizeof(*newregs));
		crash_fixup_ss_esp(newregs, oldregs);
	} else {
#ifdef CONFIG_X86_32
		asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
+2 −15
Original line number Diff line number Diff line
@@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#define compat_user_stack_pointer()	current_pt_regs()->sp
#endif

#ifdef CONFIG_X86_32
extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
#else
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
	return regs->sp;
}
#endif

#define GET_IP(regs) ((regs)->ip)
#define GET_FP(regs) ((regs)->bp)
@@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
	if (unlikely(offset > MAX_REG_OFFSET))
		return 0;
#ifdef CONFIG_X86_32
	/*
	 * Traps from the kernel do not save sp and ss.
	 * Use the helper function to retrieve sp.
	 */
	if (offset == offsetof(struct pt_regs, sp) &&
	    regs->cs == __KERNEL_CS)
		return kernel_stack_pointer(regs);

	/* The selector fields are 16-bit. */
	if (offset == offsetof(struct pt_regs, cs) ||
	    offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs,
					   unsigned long addr)
{
	return ((addr & ~(THREAD_SIZE - 1))  ==
		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
	return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}

/**
@@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
 */
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
	unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
	unsigned long *addr = (unsigned long *)regs->sp;

	addr += n;
	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+1 −1
Original line number Diff line number Diff line
@@ -78,7 +78,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
	if (regs)
		return (unsigned long *)kernel_stack_pointer(regs);
		return (unsigned long *)regs->sp;

	if (task == current)
		return __builtin_frame_address(0);
+0 −8
Original line number Diff line number Diff line
@@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)

static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
	struct pt_regs fixed_regs;

	if (!user_mode(regs)) {
		crash_fixup_ss_esp(&fixed_regs, regs);
		regs = &fixed_regs;
	}
#endif
	crash_save_cpu(regs, cpu);

	/*
Loading