Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 83c2f912 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
  perf tools: Fix compile error on x86_64 Ubuntu
  perf report: Fix --stdio output alignment when --showcpuutilization used
  perf annotate: Get rid of field_sep check
  perf annotate: Fix usage string
  perf kmem: Fix a memory leak
  perf kmem: Add missing closedir() calls
  perf top: Add error message for EMFILE
  perf test: Change type of '-v' option to INCR
  perf script: Add missing closedir() calls
  tracing: Fix compile error when static ftrace is enabled
  recordmcount: Fix handling of elf64 big-endian objects.
  perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again
  perf tools: Add support for guest/host-only profiling
  perf kvm: Do guest-only counting by default
  perf top: Don't update total_period on process_sample
  perf hists: Stop using 'self' for struct hist_entry
  perf hists: Rename total_session to total_period
  x86: Add counter when debug stack is used with interrupts enabled
  x86: Allow NMIs to hit breakpoints in i386
  x86: Keep current stack in NMI breakpoints
  ...
parents f0ed5b9a 172d1b0b
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -2475,6 +2475,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	stacktrace	[FTRACE]
			Enabled the stack tracer on boot up.

	stacktrace_filter=[function-list]
			[FTRACE] Limit the functions that the stack tracer
			will trace at boot up. function-list is a comma separated
			list of functions. This list can be changed at run
			time by the stack_trace_filter file in the debugfs
			tracing directory. Note, this enables stack tracing
			and the stacktrace above is not needed.

	sti=		[PARISC,HW]
			Format: <num>
			Set the STI (builtin display/keyboard on the HP-PARISC
+22 −0
Original line number Diff line number Diff line
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);

extern void hw_breakpoint_restore(void);

#ifdef CONFIG_X86_64
DECLARE_PER_CPU(int, debug_stack_usage);
static inline void debug_stack_usage_inc(void)
{
	__get_cpu_var(debug_stack_usage)++;
}
static inline void debug_stack_usage_dec(void)
{
	__get_cpu_var(debug_stack_usage)--;
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */


#endif	/* __KERNEL__ */

#endif /* _ASM_X86_DEBUGREG_H */
+12 −0
Original line number Diff line number Diff line
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in

extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
extern struct desc_ptr nmi_idt_descr;
extern gate_desc nmi_idt_table[];

struct gdt_page {
	struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
	desc->limit = (limit >> 16) & 0xf;
}

#ifdef CONFIG_X86_64
static inline void set_nmi_gate(int gate, void *addr)
{
	gate_desc s;

	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
	write_idt_entry(nmi_idt_table, gate, &s);
}
#endif

static inline void _set_gate(int gate, unsigned type, void *addr,
			     unsigned dpl, unsigned ist, unsigned seg)
{
+24 −0
Original line number Diff line number Diff line
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);

#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
				    (unsigned long) nmi_idt_table };

DEFINE_PER_CPU_FIRST(union irq_stack_union,
		     irq_stack_union) __aligned(PAGE_SIZE);
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
 */
DEFINE_PER_CPU(struct orig_ist, orig_ist);

static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);

int is_debug_stack(unsigned long addr)
{
	return __get_cpu_var(debug_stack_usage) ||
		(addr <= __get_cpu_var(debug_stack_addr) &&
		 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}

void debug_stack_set_zero(void)
{
	load_idt((const struct desc_ptr *)&nmi_idt_descr);
}

void debug_stack_reset(void)
{
	load_idt((const struct desc_ptr *)&idt_descr);
}

#else	/* CONFIG_X86_64 */

DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
			estacks += exception_stack_sizes[v];
			oist->ist[v] = t->x86_tss.ist[v] =
					(unsigned long)estacks;
			if (v == DEBUG_STACK-1)
				per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
		}
	}

+185 −33
Original line number Diff line number Diff line
@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
	CFI_ENDPROC
END(error_exit)

/*
 * Test if a given stack is an NMI stack or not.
 */
	.macro test_in_nmi reg stack nmi_ret normal_ret
	cmpq %\reg, \stack
	ja \normal_ret
	subq $EXCEPTION_STKSZ, %\reg
	cmpq %\reg, \stack
	jb \normal_ret
	jmp \nmi_ret
	.endm

	/* runs on exception stack */
ENTRY(nmi)
	INTR_FRAME
	PARAVIRT_ADJUST_EXCEPTION_FRAME
	pushq_cfi $-1
	/*
	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
	 * the iretq it performs will take us out of NMI context.
	 * This means that we can have nested NMIs where the next
	 * NMI is using the top of the stack of the previous NMI. We
	 * can't let it execute because the nested NMI will corrupt the
	 * stack of the previous NMI. NMI handlers are not re-entrant
	 * anyway.
	 *
	 * To handle this case we do the following:
	 *  Check the a special location on the stack that contains
	 *  a variable that is set when NMIs are executing.
	 *  The interrupted task's stack is also checked to see if it
	 *  is an NMI stack.
	 *  If the variable is not set and the stack is not the NMI
	 *  stack then:
	 *    o Set the special variable on the stack
	 *    o Copy the interrupt frame into a "saved" location on the stack
	 *    o Copy the interrupt frame into a "copy" location on the stack
	 *    o Continue processing the NMI
	 *  If the variable is set or the previous stack is the NMI stack:
	 *    o Modify the "copy" location to jump to the repeate_nmi
	 *    o return back to the first NMI
	 *
	 * Now on exit of the first NMI, we first clear the stack variable
	 * The NMI stack will tell any nested NMIs at that point that it is
	 * nested. Then we pop the stack normally with iret, and if there was
	 * a nested NMI that updated the copy interrupt stack frame, a
	 * jump will be made to the repeat_nmi code that will handle the second
	 * NMI.
	 */

	/* Use %rdx as out temp variable throughout */
	pushq_cfi %rdx

	/*
	 * Check the special variable on the stack to see if NMIs are
	 * executing.
	 */
	cmp $1, -8(%rsp)
	je nested_nmi

	/*
	 * Now test if the previous stack was an NMI stack.
	 * We need the double check. We check the NMI stack to satisfy the
	 * race when the first NMI clears the variable before returning.
	 * We check the variable because the first NMI could be in a
	 * breakpoint routine using a breakpoint stack.
	 */
	lea 6*8(%rsp), %rdx
	test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi

nested_nmi:
	/*
	 * Do nothing if we interrupted the fixup in repeat_nmi.
	 * It's about to repeat the NMI handler, so we are fine
	 * with ignoring this one.
	 */
	movq $repeat_nmi, %rdx
	cmpq 8(%rsp), %rdx
	ja 1f
	movq $end_repeat_nmi, %rdx
	cmpq 8(%rsp), %rdx
	ja nested_nmi_out

1:
	/* Set up the interrupted NMIs stack to jump to repeat_nmi */
	leaq -6*8(%rsp), %rdx
	movq %rdx, %rsp
	CFI_ADJUST_CFA_OFFSET 6*8
	pushq_cfi $__KERNEL_DS
	pushq_cfi %rdx
	pushfq_cfi
	pushq_cfi $__KERNEL_CS
	pushq_cfi $repeat_nmi

	/* Put stack back */
	addq $(11*8), %rsp
	CFI_ADJUST_CFA_OFFSET -11*8

nested_nmi_out:
	popq_cfi %rdx

	/* No need to check faults here */
	INTERRUPT_RETURN

first_nmi:
	/*
	 * Because nested NMIs will use the pushed location that we
	 * stored in rdx, we must keep that space available.
	 * Here's what our stack frame will look like:
	 * +-------------------------+
	 * | original SS             |
	 * | original Return RSP     |
	 * | original RFLAGS         |
	 * | original CS             |
	 * | original RIP            |
	 * +-------------------------+
	 * | temp storage for rdx    |
	 * +-------------------------+
	 * | NMI executing variable  |
	 * +-------------------------+
	 * | Saved SS                |
	 * | Saved Return RSP        |
	 * | Saved RFLAGS            |
	 * | Saved CS                |
	 * | Saved RIP               |
	 * +-------------------------+
	 * | copied SS               |
	 * | copied Return RSP       |
	 * | copied RFLAGS           |
	 * | copied CS               |
	 * | copied RIP              |
	 * +-------------------------+
	 * | pt_regs                 |
	 * +-------------------------+
	 *
	 * The saved RIP is used to fix up the copied RIP that a nested
	 * NMI may zero out. The original stack frame and the temp storage
	 * is also used by nested NMIs and can not be trusted on exit.
	 */
	/* Set the NMI executing variable on the stack. */
	pushq_cfi $1

	/* Copy the stack frame to the Saved frame */
	.rept 5
	pushq_cfi 6*8(%rsp)
	.endr

	/* Make another copy, this one may be modified by nested NMIs */
	.rept 5
	pushq_cfi 4*8(%rsp)
	.endr

	/* Do not pop rdx, nested NMIs will corrupt it */
	movq 11*8(%rsp), %rdx

	/*
	 * Everything below this point can be preempted by a nested
	 * NMI if the first NMI took an exception. Repeated NMIs
	 * caused by an exception and nested NMI will start here, and
	 * can still be preempted by another NMI.
	 */
restart_nmi:
	pushq_cfi $-1		/* ORIG_RAX: no syscall to restart */
	subq $ORIG_RAX-R15, %rsp
	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
	/*
	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
	 * as we should not be calling schedule in NMI context.
	 * Even with normal interrupts enabled. An NMI should not be
	 * setting NEED_RESCHED or anything that normal interrupts and
	 * exceptions might do.
	 */
	call save_paranoid
	DEFAULT_FRAME 0
	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
	movq %rsp,%rdi
	movq $-1,%rsi
	call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
	/* paranoidexit; without TRACE_IRQS_OFF */
	/* ebx:	no swapgs flag */
	DISABLE_INTERRUPTS(CLBR_NONE)
	testl %ebx,%ebx				/* swapgs needed? */
	jnz nmi_restore
	testl $3,CS(%rsp)
	jnz nmi_userspace
nmi_swapgs:
	SWAPGS_UNSAFE_STACK
nmi_restore:
	RESTORE_ALL 8
	/* Clear the NMI executing stack variable */
	movq $0, 10*8(%rsp)
	jmp irq_return
nmi_userspace:
	GET_THREAD_INFO(%rcx)
	movl TI_flags(%rcx),%ebx
	andl $_TIF_WORK_MASK,%ebx
	jz nmi_swapgs
	movq %rsp,%rdi			/* &pt_regs */
	call sync_regs
	movq %rax,%rsp			/* switch stack for scheduling */
	testl $_TIF_NEED_RESCHED,%ebx
	jnz nmi_schedule
	movl %ebx,%edx			/* arg3: thread flags */
	ENABLE_INTERRUPTS(CLBR_NONE)
	xorl %esi,%esi 			/* arg2: oldset */
	movq %rsp,%rdi 			/* arg1: &pt_regs */
	call do_notify_resume
	DISABLE_INTERRUPTS(CLBR_NONE)
	jmp nmi_userspace
nmi_schedule:
	ENABLE_INTERRUPTS(CLBR_ANY)
	call schedule
	DISABLE_INTERRUPTS(CLBR_ANY)
	jmp nmi_userspace
	CFI_ENDPROC
#else
	jmp paranoid_exit
	CFI_ENDPROC
#endif
END(nmi)

	/*
	 * If an NMI hit an iret because of an exception or breakpoint,
	 * it can lose its NMI context, and a nested NMI may come in.
	 * In that case, the nested NMI will change the preempted NMI's
	 * stack to jump to here when it does the final iret.
	 */
repeat_nmi:
	INTR_FRAME
	/* Update the stack variable to say we are still in NMI */
	movq $1, 5*8(%rsp)

	/* copy the saved stack back to copy stack */
	.rept 5
	pushq_cfi 4*8(%rsp)
	.endr

	jmp restart_nmi
	CFI_ENDPROC
end_repeat_nmi:

ENTRY(ignore_sysret)
	CFI_STARTPROC
	mov $-ENOSYS,%eax
Loading