Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b02fcf9b authored by Josh Poimboeuf's avatar Josh Poimboeuf Committed by Ingo Molnar
Browse files

x86/unwinder: Handle stack overflows more gracefully

There are at least two unwinder bugs hindering the debugging of
stack-overflow crashes:

- It doesn't deal gracefully with the case where the stack overflows and
  the stack pointer itself isn't on a valid stack but the
  to-be-dereferenced data *is*.

- The ORC oops dump code doesn't know how to print partial pt_regs, for the
  case where if we get an interrupt/exception in *early* entry code
  before the full pt_regs have been saved.

Fix both issues.

http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble



Signed-off-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarBorislav Petkov <bpetkov@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent d3a09104
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, int all);
extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);

+7 −0
Original line number Diff line number Diff line
@@ -7,6 +7,9 @@
#include <asm/ptrace.h>
#include <asm/stacktrace.h>

#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)

struct unwind_state {
	struct stack_info stack_info;
	unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
}

#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
/*
 * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
 * only the iret frame registers are accessible.  Use with caution!
 */
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
	if (unwind_done(state))
+27 −5
Original line number Diff line number Diff line
@@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable,
	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}

void show_iret_regs(struct pt_regs *regs)
{
	printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
		regs->sp, regs->flags);
}

static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
{
	if (on_stack(info, regs, sizeof(*regs)))
		__show_regs(regs, 0);
	else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
			  IRET_FRAME_SIZE)) {
		/*
		 * When an interrupt or exception occurs in entry code, the
		 * full pt_regs might not have been saved yet.  In that case
		 * just print the iret frame.
		 */
		show_iret_regs(regs);
	}
}

void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
			unsigned long *stack, char *log_lvl)
{
@@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
		if (stack_name)
			printk("%s <%s>\n", log_lvl, stack_name);

		if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
			__show_regs(regs, 0);
		if (regs)
			show_regs_safe(&stack_info, regs);

		/*
		 * Scan the stack, printing any text addresses we find.  At the
@@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,

			/*
			 * Don't print regs->ip again if it was already printed
			 * by __show_regs() below.
			 * by show_regs_safe() below.
			 */
			if (regs && stack == &regs->ip)
				goto next;
@@ -155,8 +177,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,

			/* if the frame has entry regs, print them */
			regs = unwind_get_entry_regs(&state);
			if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
				__show_regs(regs, 0);
			if (regs)
				show_regs_safe(&stack_info, regs);
		}

		if (stack_name)
+5 −6
Original line number Diff line number Diff line
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
	unsigned int fsindex, gsindex;
	unsigned int ds, cs, es;

	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
		regs->sp, regs->flags);
	show_iret_regs(regs);

	if (regs->orig_ax != -1)
		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
	else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
	       regs->r13, regs->r14, regs->r15);

	if (!all)
		return;

	asm("movl %%ds,%0" : "=r" (ds));
	asm("movl %%cs,%0" : "=r" (cs));
	asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
	rdmsrl(MSR_GS_BASE, gs);
	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);

	if (!all)
		return;

	cr0 = read_cr0();
	cr2 = read_cr2();
	cr3 = __read_cr3();
+25 −49
Original line number Diff line number Diff line
@@ -253,21 +253,14 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
	return NULL;
}

static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
			    size_t len)
{
	struct stack_info *info = &state->stack_info;
	void *addr = (void *)_addr;

	/*
	 * If the address isn't on the current stack, switch to the next one.
	 *
	 * We may have to traverse multiple stacks to deal with the possibility
	 * that info->next_sp could point to an empty stack and the address
	 * could be on a subsequent stack.
	 */
	while (!on_stack(info, (void *)addr, len))
		if (get_stack_info(info->next_sp, state->task, info,
				   &state->stack_mask))
	if (!on_stack(info, addr, len) &&
	    (get_stack_info(addr, state->task, info, &state->stack_mask)))
		return false;

	return true;
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
	return true;
}

#define REGS_SIZE (sizeof(struct pt_regs))
#define SP_OFFSET (offsetof(struct pt_regs, sp))
#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))

static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
			     unsigned long *ip, unsigned long *sp, bool full)
			     unsigned long *ip, unsigned long *sp)
{
	size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
	size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
	struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
	struct pt_regs *regs = (struct pt_regs *)addr;

	if (IS_ENABLED(CONFIG_X86_64)) {
		if (!stack_access_ok(state, addr, regs_size))
	/* x86-32 support will be more complicated due to the &regs->sp hack */
	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));

	if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
		return false;

	*ip = regs->ip;
	*sp = regs->sp;

	return true;
}

	if (!stack_access_ok(state, addr, sp_offset))
		return false;

	*ip = regs->ip;
static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
				  unsigned long *ip, unsigned long *sp)
{
	struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;

	if (user_mode(regs)) {
		if (!stack_access_ok(state, addr + sp_offset,
				     REGS_SIZE - SP_OFFSET))
	if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
		return false;

	*ip = regs->ip;
	*sp = regs->sp;
	} else
		*sp = (unsigned long)&regs->sp;

	return true;
}

@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
	unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
	enum stack_type prev_type = state->stack_info.type;
	struct orc_entry *orc;
	struct pt_regs *ptregs;
	bool indirect = false;

	if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
		break;

	case ORC_TYPE_REGS:
		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
		if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
			orc_warn("can't dereference registers at %p for ip %pB\n",
				 (void *)sp, (void *)orig_ip);
			goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
		break;

	case ORC_TYPE_REGS_IRET:
		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
		if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
			orc_warn("can't dereference iret registers at %p for ip %pB\n",
				 (void *)sp, (void *)orig_ip);
			goto done;
		}

		ptregs = container_of((void *)sp, struct pt_regs, ip);
		if ((unsigned long)ptregs >= prev_sp &&
		    on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
			state->regs = ptregs;
		state->regs = (void *)sp - IRET_FRAME_OFFSET;
		state->full_regs = false;
		} else
			state->regs = NULL;

		state->signal = true;
		break;