Loading arch/x86/include/asm/debugreg.h +22 −0 Original line number Diff line number Diff line Loading @@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); extern void hw_breakpoint_restore(void); #ifdef CONFIG_X86_64 DECLARE_PER_CPU(int, debug_stack_usage); static inline void debug_stack_usage_inc(void) { __get_cpu_var(debug_stack_usage)++; } static inline void debug_stack_usage_dec(void) { __get_cpu_var(debug_stack_usage)--; } int is_debug_stack(unsigned long addr); void debug_stack_set_zero(void); void debug_stack_reset(void); #else /* !X86_64 */ static inline int is_debug_stack(unsigned long addr) { return 0; } static inline void debug_stack_set_zero(void) { } static inline void debug_stack_reset(void) { } static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ arch/x86/include/asm/desc.h +12 −0 Original line number Diff line number Diff line Loading @@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; extern struct desc_ptr nmi_idt_descr; extern gate_desc nmi_idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; Loading Loading @@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit = (limit >> 16) & 0xf; } #ifdef CONFIG_X86_64 static inline void set_nmi_gate(int gate, void *addr) { gate_desc s; pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); write_idt_entry(nmi_idt_table, gate, &s); } #endif static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { Loading arch/x86/kernel/cpu/common.c +24 −0 Original line number Diff line number Diff line Loading @@ -1026,6 +1026,8 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) nmi_idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); Loading Loading @@ -1090,6 +1092,26 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); static DEFINE_PER_CPU(unsigned long, debug_stack_addr); DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { return __get_cpu_var(debug_stack_usage) || (addr <= __get_cpu_var(debug_stack_addr) && addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } void debug_stack_set_zero(void) { load_idt((const struct desc_ptr *)&nmi_idt_descr); } void debug_stack_reset(void) { load_idt((const struct desc_ptr *)&idt_descr); } #else /* CONFIG_X86_64 */ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; Loading Loading @@ -1208,6 +1230,8 @@ void __cpuinit cpu_init(void) estacks += exception_stack_sizes[v]; oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; if (v == DEBUG_STACK-1) per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; } } Loading arch/x86/kernel/entry_64.S +185 −33 Original line number Diff line number Diff line Loading @@ -1475,62 +1475,214 @@ ENTRY(error_exit) CFI_ENDPROC END(error_exit) /* * Test if a given stack is an NMI stack or not. */ .macro test_in_nmi reg stack nmi_ret normal_ret cmpq %\reg, \stack ja \normal_ret subq $EXCEPTION_STKSZ, %\reg cmpq %\reg, \stack jb \normal_ret jmp \nmi_ret .endm /* runs on exception stack */ ENTRY(nmi) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. * This means that we can have nested NMIs where the next * NMI is using the top of the stack of the previous NMI. We * can't let it execute because the nested NMI will corrupt the * stack of the previous NMI. NMI handlers are not re-entrant * anyway. * * To handle this case we do the following: * Check the a special location on the stack that contains * a variable that is set when NMIs are executing. * The interrupted task's stack is also checked to see if it * is an NMI stack. * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack * o Copy the interrupt frame into a "saved" location on the stack * o Copy the interrupt frame into a "copy" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: * o Modify the "copy" location to jump to the repeate_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable * The NMI stack will tell any nested NMIs at that point that it is * nested. Then we pop the stack normally with iret, and if there was * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. */ /* Use %rdx as out temp variable throughout */ pushq_cfi %rdx /* * Check the special variable on the stack to see if NMIs are * executing. */ cmp $1, -8(%rsp) je nested_nmi /* * Now test if the previous stack was an NMI stack. * We need the double check. We check the NMI stack to satisfy the * race when the first NMI clears the variable before returning. * We check the variable because the first NMI could be in a * breakpoint routine using a breakpoint stack. */ lea 6*8(%rsp), %rdx test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi nested_nmi: /* * Do nothing if we interrupted the fixup in repeat_nmi. * It's about to repeat the NMI handler, so we are fine * with ignoring this one. */ movq $repeat_nmi, %rdx cmpq 8(%rsp), %rdx ja 1f movq $end_repeat_nmi, %rdx cmpq 8(%rsp), %rdx ja nested_nmi_out 1: /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -6*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 6*8 pushq_cfi $__KERNEL_DS pushq_cfi %rdx pushfq_cfi pushq_cfi $__KERNEL_CS pushq_cfi $repeat_nmi /* Put stack back */ addq $(11*8), %rsp CFI_ADJUST_CFA_OFFSET -11*8 nested_nmi_out: popq_cfi %rdx /* No need to check faults here */ INTERRUPT_RETURN first_nmi: /* * Because nested NMIs will use the pushed location that we * stored in rdx, we must keep that space available. * Here's what our stack frame will look like: * +-------------------------+ * | original SS | * | original Return RSP | * | original RFLAGS | * | original CS | * | original RIP | * +-------------------------+ * | temp storage for rdx | * +-------------------------+ * | NMI executing variable | * +-------------------------+ * | Saved SS | * | Saved Return RSP | * | Saved RFLAGS | * | Saved CS | * | Saved RIP | * +-------------------------+ * | copied SS | * | copied Return RSP | * | copied RFLAGS | * | copied CS | * | copied RIP | * +-------------------------+ * | pt_regs | * +-------------------------+ * * The saved RIP is used to fix up the copied RIP that a nested * NMI may zero out. The original stack frame and the temp storage * is also used by nested NMIs and can not be trusted on exit. */ /* Set the NMI executing variable on the stack. */ pushq_cfi $1 /* Copy the stack frame to the Saved frame */ .rept 5 pushq_cfi 6*8(%rsp) .endr /* Make another copy, this one may be modified by nested NMIs */ .rept 5 pushq_cfi 4*8(%rsp) .endr /* Do not pop rdx, nested NMIs will corrupt it */ movq 11*8(%rsp), %rdx /* * Everything below this point can be preempted by a nested * NMI if the first NMI took an exception. Repeated NMIs * caused by an exception and nested NMI will start here, and * can still be preempted by another NMI. */ restart_nmi: pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 /* * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit * as we should not be calling schedule in NMI context. * Even with normal interrupts enabled. An NMI should not be * setting NEED_RESCHED or anything that normal interrupts and * exceptions might do. */ call save_paranoid DEFAULT_FRAME 0 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi #ifdef CONFIG_TRACE_IRQFLAGS /* paranoidexit; without TRACE_IRQS_OFF */ /* ebx: no swapgs flag */ DISABLE_INTERRUPTS(CLBR_NONE) testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore testl $3,CS(%rsp) jnz nmi_userspace nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_ALL 8 /* Clear the NMI executing stack variable */ movq $0, 10*8(%rsp) jmp irq_return nmi_userspace: GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%ebx andl $_TIF_WORK_MASK,%ebx jz nmi_swapgs movq %rsp,%rdi /* &pt_regs */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ testl $_TIF_NEED_RESCHED,%ebx jnz nmi_schedule movl %ebx,%edx /* arg3: thread flags */ ENABLE_INTERRUPTS(CLBR_NONE) xorl %esi,%esi /* arg2: oldset */ movq %rsp,%rdi /* arg1: &pt_regs */ call do_notify_resume DISABLE_INTERRUPTS(CLBR_NONE) jmp nmi_userspace nmi_schedule: ENABLE_INTERRUPTS(CLBR_ANY) call schedule DISABLE_INTERRUPTS(CLBR_ANY) jmp nmi_userspace CFI_ENDPROC #else jmp paranoid_exit CFI_ENDPROC #endif END(nmi) /* * If an NMI hit an iret because of an exception or breakpoint, * it can lose its NMI context, and a nested NMI may come in. * In that case, the nested NMI will change the preempted NMI's * stack to jump to here when it does the final iret. */ repeat_nmi: INTR_FRAME /* Update the stack variable to say we are still in NMI */ movq $1, 5*8(%rsp) /* copy the saved stack back to copy stack */ .rept 5 pushq_cfi 4*8(%rsp) .endr jmp restart_nmi CFI_ENDPROC end_repeat_nmi: ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax Loading arch/x86/kernel/head_64.S +4 −0 Original line number Diff line number Diff line Loading @@ -417,6 +417,10 @@ ENTRY(phys_base) ENTRY(idt_table) .skip IDT_ENTRIES * 16 .align L1_CACHE_BYTES ENTRY(nmi_idt_table) .skip IDT_ENTRIES * 16 __PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) Loading Loading
arch/x86/include/asm/debugreg.h +22 −0 Original line number Diff line number Diff line Loading @@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump); extern void hw_breakpoint_restore(void); #ifdef CONFIG_X86_64 DECLARE_PER_CPU(int, debug_stack_usage); static inline void debug_stack_usage_inc(void) { __get_cpu_var(debug_stack_usage)++; } static inline void debug_stack_usage_dec(void) { __get_cpu_var(debug_stack_usage)--; } int is_debug_stack(unsigned long addr); void debug_stack_set_zero(void); void debug_stack_reset(void); #else /* !X86_64 */ static inline int is_debug_stack(unsigned long addr) { return 0; } static inline void debug_stack_set_zero(void) { } static inline void debug_stack_reset(void) { } static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */
arch/x86/include/asm/desc.h +12 −0 Original line number Diff line number Diff line Loading @@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in extern struct desc_ptr idt_descr; extern gate_desc idt_table[]; extern struct desc_ptr nmi_idt_descr; extern gate_desc nmi_idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; Loading Loading @@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit = (limit >> 16) & 0xf; } #ifdef CONFIG_X86_64 static inline void set_nmi_gate(int gate, void *addr) { gate_desc s; pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); write_idt_entry(nmi_idt_table, gate, &s); } #endif static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { Loading
arch/x86/kernel/cpu/common.c +24 −0 Original line number Diff line number Diff line Loading @@ -1026,6 +1026,8 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) nmi_idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); Loading Loading @@ -1090,6 +1092,26 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); static DEFINE_PER_CPU(unsigned long, debug_stack_addr); DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { return __get_cpu_var(debug_stack_usage) || (addr <= __get_cpu_var(debug_stack_addr) && addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } void debug_stack_set_zero(void) { load_idt((const struct desc_ptr *)&nmi_idt_descr); } void debug_stack_reset(void) { load_idt((const struct desc_ptr *)&idt_descr); } #else /* CONFIG_X86_64 */ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; Loading Loading @@ -1208,6 +1230,8 @@ void __cpuinit cpu_init(void) estacks += exception_stack_sizes[v]; oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; if (v == DEBUG_STACK-1) per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; } } Loading
arch/x86/kernel/entry_64.S +185 −33 Original line number Diff line number Diff line Loading @@ -1475,62 +1475,214 @@ ENTRY(error_exit) CFI_ENDPROC END(error_exit) /* * Test if a given stack is an NMI stack or not. */ .macro test_in_nmi reg stack nmi_ret normal_ret cmpq %\reg, \stack ja \normal_ret subq $EXCEPTION_STKSZ, %\reg cmpq %\reg, \stack jb \normal_ret jmp \nmi_ret .endm /* runs on exception stack */ ENTRY(nmi) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. * This means that we can have nested NMIs where the next * NMI is using the top of the stack of the previous NMI. We * can't let it execute because the nested NMI will corrupt the * stack of the previous NMI. NMI handlers are not re-entrant * anyway. * * To handle this case we do the following: * Check the a special location on the stack that contains * a variable that is set when NMIs are executing. * The interrupted task's stack is also checked to see if it * is an NMI stack. * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack * o Copy the interrupt frame into a "saved" location on the stack * o Copy the interrupt frame into a "copy" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: * o Modify the "copy" location to jump to the repeate_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable * The NMI stack will tell any nested NMIs at that point that it is * nested. Then we pop the stack normally with iret, and if there was * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. */ /* Use %rdx as out temp variable throughout */ pushq_cfi %rdx /* * Check the special variable on the stack to see if NMIs are * executing. */ cmp $1, -8(%rsp) je nested_nmi /* * Now test if the previous stack was an NMI stack. * We need the double check. We check the NMI stack to satisfy the * race when the first NMI clears the variable before returning. * We check the variable because the first NMI could be in a * breakpoint routine using a breakpoint stack. */ lea 6*8(%rsp), %rdx test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi nested_nmi: /* * Do nothing if we interrupted the fixup in repeat_nmi. * It's about to repeat the NMI handler, so we are fine * with ignoring this one. */ movq $repeat_nmi, %rdx cmpq 8(%rsp), %rdx ja 1f movq $end_repeat_nmi, %rdx cmpq 8(%rsp), %rdx ja nested_nmi_out 1: /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -6*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 6*8 pushq_cfi $__KERNEL_DS pushq_cfi %rdx pushfq_cfi pushq_cfi $__KERNEL_CS pushq_cfi $repeat_nmi /* Put stack back */ addq $(11*8), %rsp CFI_ADJUST_CFA_OFFSET -11*8 nested_nmi_out: popq_cfi %rdx /* No need to check faults here */ INTERRUPT_RETURN first_nmi: /* * Because nested NMIs will use the pushed location that we * stored in rdx, we must keep that space available. * Here's what our stack frame will look like: * +-------------------------+ * | original SS | * | original Return RSP | * | original RFLAGS | * | original CS | * | original RIP | * +-------------------------+ * | temp storage for rdx | * +-------------------------+ * | NMI executing variable | * +-------------------------+ * | Saved SS | * | Saved Return RSP | * | Saved RFLAGS | * | Saved CS | * | Saved RIP | * +-------------------------+ * | copied SS | * | copied Return RSP | * | copied RFLAGS | * | copied CS | * | copied RIP | * +-------------------------+ * | pt_regs | * +-------------------------+ * * The saved RIP is used to fix up the copied RIP that a nested * NMI may zero out. The original stack frame and the temp storage * is also used by nested NMIs and can not be trusted on exit. */ /* Set the NMI executing variable on the stack. */ pushq_cfi $1 /* Copy the stack frame to the Saved frame */ .rept 5 pushq_cfi 6*8(%rsp) .endr /* Make another copy, this one may be modified by nested NMIs */ .rept 5 pushq_cfi 4*8(%rsp) .endr /* Do not pop rdx, nested NMIs will corrupt it */ movq 11*8(%rsp), %rdx /* * Everything below this point can be preempted by a nested * NMI if the first NMI took an exception. Repeated NMIs * caused by an exception and nested NMI will start here, and * can still be preempted by another NMI. */ restart_nmi: pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 /* * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit * as we should not be calling schedule in NMI context. * Even with normal interrupts enabled. An NMI should not be * setting NEED_RESCHED or anything that normal interrupts and * exceptions might do. */ call save_paranoid DEFAULT_FRAME 0 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi call do_nmi #ifdef CONFIG_TRACE_IRQFLAGS /* paranoidexit; without TRACE_IRQS_OFF */ /* ebx: no swapgs flag */ DISABLE_INTERRUPTS(CLBR_NONE) testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore testl $3,CS(%rsp) jnz nmi_userspace nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: RESTORE_ALL 8 /* Clear the NMI executing stack variable */ movq $0, 10*8(%rsp) jmp irq_return nmi_userspace: GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%ebx andl $_TIF_WORK_MASK,%ebx jz nmi_swapgs movq %rsp,%rdi /* &pt_regs */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ testl $_TIF_NEED_RESCHED,%ebx jnz nmi_schedule movl %ebx,%edx /* arg3: thread flags */ ENABLE_INTERRUPTS(CLBR_NONE) xorl %esi,%esi /* arg2: oldset */ movq %rsp,%rdi /* arg1: &pt_regs */ call do_notify_resume DISABLE_INTERRUPTS(CLBR_NONE) jmp nmi_userspace nmi_schedule: ENABLE_INTERRUPTS(CLBR_ANY) call schedule DISABLE_INTERRUPTS(CLBR_ANY) jmp nmi_userspace CFI_ENDPROC #else jmp paranoid_exit CFI_ENDPROC #endif END(nmi) /* * If an NMI hit an iret because of an exception or breakpoint, * it can lose its NMI context, and a nested NMI may come in. * In that case, the nested NMI will change the preempted NMI's * stack to jump to here when it does the final iret. */ repeat_nmi: INTR_FRAME /* Update the stack variable to say we are still in NMI */ movq $1, 5*8(%rsp) /* copy the saved stack back to copy stack */ .rept 5 pushq_cfi 4*8(%rsp) .endr jmp restart_nmi CFI_ENDPROC end_repeat_nmi: ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax Loading
arch/x86/kernel/head_64.S +4 −0 Original line number Diff line number Diff line Loading @@ -417,6 +417,10 @@ ENTRY(phys_base) ENTRY(idt_table) .skip IDT_ENTRIES * 16 .align L1_CACHE_BYTES ENTRY(nmi_idt_table) .skip IDT_ENTRIES * 16 __PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) Loading