Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5cc8c2ec authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge 4.4.110 into android-4.4



Changes in 4.4.110
	x86/boot: Add early cmdline parsing for options with arguments
	KAISER: Kernel Address Isolation
	kaiser: merged update
	kaiser: do not set _PAGE_NX on pgd_none
	kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE
	kaiser: fix build and FIXME in alloc_ldt_struct()
	kaiser: KAISER depends on SMP
	kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
	kaiser: fix perf crashes
	kaiser: ENOMEM if kaiser_pagetable_walk() NULL
	kaiser: tidied up asm/kaiser.h somewhat
	kaiser: tidied up kaiser_add/remove_mapping slightly
	kaiser: kaiser_remove_mapping() move along the pgd
	kaiser: cleanups while trying for gold link
	kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
	kaiser: delete KAISER_REAL_SWITCH option
	kaiser: vmstat show NR_KAISERTABLE as nr_overhead
	kaiser: enhanced by kernel and user PCIDs
	kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
	kaiser: PCID 0 for kernel and 128 for user
	kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user
	kaiser: paranoid_entry pass cr3 need to paranoid_exit
	kaiser: _pgd_alloc() without __GFP_REPEAT to avoid stalls
	kaiser: fix unlikely error in alloc_ldt_struct()
	kaiser: add "nokaiser" boot option, using ALTERNATIVE
	x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling
	x86/kaiser: Check boottime cmdline params
	kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush
	kaiser: drop is_atomic arg to kaiser_pagetable_walk()
	kaiser: asm/tlbflush.h handle noPGE at lower level
	kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
	x86/paravirt: Dont patch flush_tlb_single
	x86/kaiser: Reenable PARAVIRT
	kaiser: disabled on Xen PV
	x86/kaiser: Move feature detection up
	KPTI: Rename to PAGE_TABLE_ISOLATION
	KPTI: Report when enabled
	x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader
	x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap
	x86/kasan: Clear kasan_zero_page after TLB flush
	kaiser: Set _PAGE_NX only if supported
	Linux 4.4.110

Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@google.com>
parents a51b8409 b3e3db15
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -2529,6 +2529,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.

	nojitter	[IA-64] Disables jitter checking for ITC timers.

	nopti		[X86-64] Disable KAISER isolation of kernel from user.

	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver

	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
@@ -3060,6 +3062,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	pt.		[PARIDE]
			See Documentation/blockdev/paride.txt.

	pti=		[X86_64]
			Control KAISER user/kernel address space isolation:
			on - enable
			off - disable
			auto - default setting

	pty.legacy_count=
			[KNL] Number of legacy pty's. Overwrites compiled-in
			default number.
+1 −1
Original line number Diff line number Diff line
VERSION = 4
PATCHLEVEL = 4
SUBLEVEL = 109
SUBLEVEL = 110
EXTRAVERSION =
NAME = Blurry Fish Butt

+1 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
 */
#undef CONFIG_PARAVIRT
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_PAGE_TABLE_ISOLATION
#undef CONFIG_KASAN

#include <linux/linkage.h>
+145 −19
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/kaiser.h>
#include <linux/err.h>

/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -135,6 +136,7 @@ ENTRY(entry_SYSCALL_64)
	 * it is too small to ever cause noticeable irq latency.
	 */
	SWAPGS_UNSAFE_STACK
	SWITCH_KERNEL_CR3_NO_STACK
	/*
	 * A hypervisor implementation might want to use a label
	 * after the swapgs, so that it can do the swapgs
@@ -207,9 +209,17 @@ entry_SYSCALL_64_fastpath:
	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
	jnz	int_ret_from_sys_call_irqs_off	/* Go to the slow path */

	RESTORE_C_REGS_EXCEPT_RCX_R11
	movq	RIP(%rsp), %rcx
	movq	EFLAGS(%rsp), %r11
	RESTORE_C_REGS_EXCEPT_RCX_R11
	/*
	 * This opens a window where we have a user CR3, but are
	 * running in the kernel.  This makes using the CS
	 * register useless for telling whether or not we need to
	 * switch CR3 in NMIs.  Normal interrupts are OK because
	 * they are off here.
	 */
	SWITCH_USER_CR3
	movq	RSP(%rsp), %rsp
	/*
	 * 64-bit SYSRET restores rip from rcx,
@@ -347,10 +357,26 @@ GLOBAL(int_ret_from_sys_call)
syscall_return_via_sysret:
	/* rcx and r11 are already restored (see code above) */
	RESTORE_C_REGS_EXCEPT_RCX_R11
	/*
	 * This opens a window where we have a user CR3, but are
	 * running in the kernel.  This makes using the CS
	 * register useless for telling whether or not we need to
	 * switch CR3 in NMIs.  Normal interrupts are OK because
	 * they are off here.
	 */
	SWITCH_USER_CR3
	movq	RSP(%rsp), %rsp
	USERGS_SYSRET64

opportunistic_sysret_failed:
	/*
	 * This opens a window where we have a user CR3, but are
	 * running in the kernel.  This makes using the CS
	 * register useless for telling whether or not we need to
	 * switch CR3 in NMIs.  Normal interrupts are OK because
	 * they are off here.
	 */
	SWITCH_USER_CR3
	SWAPGS
	jmp	restore_c_regs_and_iret
END(entry_SYSCALL_64)
@@ -509,6 +535,7 @@ END(irq_entries_start)
	 * tracking that we're in kernel mode.
	 */
	SWAPGS
	SWITCH_KERNEL_CR3

	/*
	 * We need to tell lockdep that IRQs are off.  We can't do this until
@@ -568,6 +595,7 @@ GLOBAL(retint_user)
	mov	%rsp,%rdi
	call	prepare_exit_to_usermode
	TRACE_IRQS_IRETQ
	SWITCH_USER_CR3
	SWAPGS
	jmp	restore_regs_and_iret

@@ -625,6 +653,7 @@ native_irq_return_ldt:
	pushq	%rax
	pushq	%rdi
	SWAPGS
	SWITCH_KERNEL_CR3
	movq	PER_CPU_VAR(espfix_waddr), %rdi
	movq	%rax, (0*8)(%rdi)		/* RAX */
	movq	(2*8)(%rsp), %rax		/* RIP */
@@ -640,6 +669,7 @@ native_irq_return_ldt:
	andl	$0xffff0000, %eax
	popq	%rdi
	orq	PER_CPU_VAR(espfix_stack), %rax
	SWITCH_USER_CR3
	SWAPGS
	movq	%rax, %rsp
	popq	%rax
@@ -1001,7 +1031,11 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vec
/*
 * Save all registers in pt_regs, and switch gs if needed.
 * Use slow, but surefire "are we in kernel?" check.
 * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
 *
 * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
 *         ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
 *         ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
 *         ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
 */
ENTRY(paranoid_entry)
	cld
@@ -1014,7 +1048,26 @@ ENTRY(paranoid_entry)
	js	1f				/* negative -> in kernel */
	SWAPGS
	xorl	%ebx, %ebx
1:	ret
1:
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	/*
	 * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
	 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
	 * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
	 * unconditionally, but we need to find out whether the reverse
	 * should be done on return (conveyed to paranoid_exit in %ebx).
	 */
	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
	testl	$KAISER_SHADOW_PGD_OFFSET, %eax
	jz	2f
	orl	$2, %ebx
	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
	/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
	movq	%rax, %cr3
2:
#endif
	ret
END(paranoid_entry)

/*
@@ -1027,19 +1080,26 @@ END(paranoid_entry)
 * be complicated.  Fortunately, we there's no good reason
 * to try to handle preemption here.
 *
 * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
 * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
 *           ebx=1: needs neither swapgs nor SWITCH_USER_CR3
 *           ebx=2: needs both swapgs and SWITCH_USER_CR3
 *           ebx=3: needs SWITCH_USER_CR3 but not swapgs
 */
ENTRY(paranoid_exit)
	DISABLE_INTERRUPTS(CLBR_NONE)
	TRACE_IRQS_OFF_DEBUG
	testl	%ebx, %ebx			/* swapgs needed? */
	TRACE_IRQS_IRETQ_DEBUG
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
	jz	paranoid_exit_no_switch
	SWITCH_USER_CR3
paranoid_exit_no_switch:
#endif
	testl	$1, %ebx			/* swapgs needed? */
	jnz	paranoid_exit_no_swapgs
	TRACE_IRQS_IRETQ
	SWAPGS_UNSAFE_STACK
	jmp	paranoid_exit_restore
paranoid_exit_no_swapgs:
	TRACE_IRQS_IRETQ_DEBUG
paranoid_exit_restore:
	RESTORE_EXTRA_REGS
	RESTORE_C_REGS
	REMOVE_PT_GPREGS_FROM_STACK 8
@@ -1054,6 +1114,13 @@ ENTRY(error_entry)
	cld
	SAVE_C_REGS 8
	SAVE_EXTRA_REGS 8
	/*
	 * error_entry() always returns with a kernel gsbase and
	 * CR3.  We must also have a kernel CR3/gsbase before
	 * calling TRACE_IRQS_*.  Just unconditionally switch to
	 * the kernel CR3 here.
	 */
	SWITCH_KERNEL_CR3
	xorl	%ebx, %ebx
	testb	$3, CS+8(%rsp)
	jz	.Lerror_kernelspace
@@ -1216,6 +1283,10 @@ ENTRY(nmi)
	 */

	SWAPGS_UNSAFE_STACK
	/*
	 * percpu variables are mapped with user CR3, so no need
	 * to switch CR3 here.
	 */
	cld
	movq	%rsp, %rdx
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1249,12 +1320,34 @@ ENTRY(nmi)

	movq	%rsp, %rdi
	movq	$-1, %rsi
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	/* Unconditionally use kernel CR3 for do_nmi() */
	/* %rax is saved above, so OK to clobber here */
	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
	pushq	%rax
	/* mask off "user" bit of pgd address and 12 PCID bits: */
	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
	movq	%rax, %cr3
2:
#endif
	call	do_nmi

#ifdef CONFIG_PAGE_TABLE_ISOLATION
	/*
	 * Unconditionally restore CR3.  I know we return to
	 * kernel code that needs user CR3, but do we ever return
	 * to "user mode" where we need the kernel CR3?
	 */
	ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
#endif

	/*
	 * Return back to user mode.  We must *not* do the normal exit
	 * work, because we don't want to enable interrupts.  Fortunately,
	 * do_nmi doesn't modify pt_regs.
	 * work, because we don't want to enable interrupts.  Do not
	 * switch to user CR3: we might be going back to kernel code
	 * that had a user CR3 set.
	 */
	SWAPGS
	jmp	restore_c_regs_and_iret
@@ -1451,22 +1544,55 @@ end_repeat_nmi:
	ALLOC_PT_GPREGS_ON_STACK

	/*
	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
	 * as we should not be calling schedule in NMI context.
	 * Even with normal interrupts enabled. An NMI should not be
	 * setting NEED_RESCHED or anything that normal interrupts and
	 * exceptions might do.
	 * Use the same approach as paranoid_entry to handle SWAPGS, but
	 * without CR3 handling since we do that differently in NMIs.  No
	 * need to use paranoid_exit as we should not be calling schedule
	 * in NMI context.  Even with normal interrupts enabled. An NMI
	 * should not be setting NEED_RESCHED or anything that normal
	 * interrupts and exceptions might do.
	 */
	call	paranoid_entry

	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
	cld
	SAVE_C_REGS
	SAVE_EXTRA_REGS
	movl	$1, %ebx
	movl	$MSR_GS_BASE, %ecx
	rdmsr
	testl	%edx, %edx
	js	1f				/* negative -> in kernel */
	SWAPGS
	xorl	%ebx, %ebx
1:
	movq	%rsp, %rdi
	movq	$-1, %rsi
#ifdef CONFIG_PAGE_TABLE_ISOLATION
	/* Unconditionally use kernel CR3 for do_nmi() */
	/* %rax is saved above, so OK to clobber here */
	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
	pushq	%rax
	/* mask off "user" bit of pgd address and 12 PCID bits: */
	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
	movq	%rax, %cr3
2:
#endif

	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
	call	do_nmi

#ifdef CONFIG_PAGE_TABLE_ISOLATION
	/*
	 * Unconditionally restore CR3.  We might be returning to
	 * kernel code that needs user CR3, like just just before
	 * a sysret.
	 */
	ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
#endif

	testl	%ebx, %ebx			/* swapgs needed? */
	jnz	nmi_restore
nmi_swapgs:
	/* We fixed up CR3 above, so no need to switch it here */
	SWAPGS_UNSAFE_STACK
nmi_restore:
	RESTORE_EXTRA_REGS
+7 −0
Original line number Diff line number Diff line
@@ -13,6 +13,8 @@
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/pgtable_types.h>
#include <asm/kaiser.h>
#include <linux/linkage.h>
#include <linux/err.h>

@@ -50,6 +52,7 @@ ENDPROC(native_usergs_sysret32)
ENTRY(entry_SYSENTER_compat)
	/* Interrupts are off on entry. */
	SWAPGS_UNSAFE_STACK
	SWITCH_KERNEL_CR3_NO_STACK
	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp

	/*
@@ -161,6 +164,7 @@ ENDPROC(entry_SYSENTER_compat)
ENTRY(entry_SYSCALL_compat)
	/* Interrupts are off on entry. */
	SWAPGS_UNSAFE_STACK
	SWITCH_KERNEL_CR3_NO_STACK

	/* Stash user ESP and switch to the kernel stack. */
	movl	%esp, %r8d
@@ -208,6 +212,7 @@ ENTRY(entry_SYSCALL_compat)
	/* Opportunistic SYSRET */
sysret32_from_system_call:
	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
	SWITCH_USER_CR3
	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
	movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */
@@ -269,6 +274,7 @@ ENTRY(entry_INT80_compat)
	PARAVIRT_ADJUST_EXCEPTION_FRAME
	ASM_CLAC			/* Do this early to minimize exposure */
	SWAPGS
	SWITCH_KERNEL_CR3_NO_STACK

	/*
	 * User tracing code (ptrace or signal handlers) might assume that
@@ -311,6 +317,7 @@ ENTRY(entry_INT80_compat)

	/* Go back to user mode. */
	TRACE_IRQS_ON
	SWITCH_USER_CR3
	SWAPGS
	jmp	restore_regs_and_iret
END(entry_INT80_compat)
Loading