Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8bbe0dec authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull more KVM updates from Paolo Bonzini:
 "x86 KVM changes:

   - The usual accuracy improvements for nested virtualization

   - The usual round of code cleanups from Sean

   - Added back optimizations that were prematurely removed in 5.2 (the
     bare minimum needed to fix the regression was in 5.3-rc8, here
     comes the rest)

   - Support for UMWAIT/UMONITOR/TPAUSE

   - Direct L2->L0 TLB flushing when L0 is Hyper-V and L1 is KVM

   - Tell Windows guests if SMT is disabled on the host

   - More accurate detection of vmexit cost

   - Revert a pvqspinlock pessimization"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (56 commits)
  KVM: nVMX: cleanup and fix host 64-bit mode checks
  KVM: vmx: fix build warnings in hv_enable_direct_tlbflush() on i386
  KVM: x86: Don't check kvm_rebooting in __kvm_handle_fault_on_reboot()
  KVM: x86: Drop ____kvm_handle_fault_on_reboot()
  KVM: VMX: Add error handling to VMREAD helper
  KVM: VMX: Optimize VMX instruction error and fault handling
  KVM: x86: Check kvm_rebooting in kvm_spurious_fault()
  KVM: selftests: fix ucall on x86
  Revert "locking/pvqspinlock: Don't wait if vCPU is preempted"
  kvm: nvmx: limit atomic switch MSRs
  kvm: svm: Intercept RDPRU
  kvm: x86: Add "significant index" flag to a few CPUID leaves
  KVM: x86/mmu: Skip invalid pages during zapping iff root_count is zero
  KVM: x86/mmu: Explicitly track only a single invalid mmu generation
  KVM: x86/mmu: Revert "KVM: x86/mmu: Remove is_obsolete() call"
  KVM: x86/mmu: Revert "Revert "KVM: MMU: reclaim the zapped-obsolete page first""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: collapse TLB flushes when zap all pages""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: add tracepoint for kvm_mmu_invalidate_all_pages""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: show mmu_valid_gen in shadow page related tracepoints""
  ...
parents e37e3bc7 fd3edd4a
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -5309,3 +5309,16 @@ Architectures: x86
This capability indicates that KVM supports paravirtualized Hyper-V IPI send
hypercalls:
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH

Architecture: x86

This capability indicates that KVM running on top of Hyper-V hypervisor
enables Direct TLB flush for its guests meaning that TLB flush
hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
Due to the different ABI for hypercall parameters between Hyper-V and
KVM, enabling this capability effectively disables all hypercall
handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
flush hypercalls by Hyper-V) so userspace should disable KVM identification
in CPUID and only exposes Hyper-V identification. In this case, guest
thinks it's running on Hyper-V and only use Hyper-V hypercalls.
+26 −5
Original line number Diff line number Diff line
@@ -180,7 +180,15 @@
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED		BIT(14)

/*
 * Virtual processor will never share a physical core with another virtual
 * processor, except for virtual processors that are reported as sibling SMT
 * threads.
 */
#define HV_X64_NO_NONARCH_CORESHARING                  BIT(18)

/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
#define HV_X64_NESTED_DIRECT_FLUSH			BIT(17)
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH		BIT(18)
#define HV_X64_NESTED_MSR_BITMAP			BIT(19)

@@ -524,14 +532,24 @@ struct hv_timer_message_payload {
	__u64 delivery_time;	/* When the message was delivered */
} __packed;

struct hv_nested_enlightenments_control {
	struct {
		__u32 directhypercall:1;
		__u32 reserved:31;
	} features;
	struct {
		__u32 reserved;
	} hypercallControls;
} __packed;

/* Define virtual processor assist page structure. */
struct hv_vp_assist_page {
	__u32 apic_assist;
	__u32 reserved;
	__u64 vtl_control[2];
	__u64 nested_enlightenments_control[2];
	__u32 enlighten_vmentry;
	__u32 padding;
	__u32 reserved1;
	__u64 vtl_control[3];
	struct hv_nested_enlightenments_control nested_control;
	__u8 enlighten_vmentry;
	__u8 reserved2[7];
	__u64 current_nested_vmcs;
} __packed;

@@ -882,4 +900,7 @@ struct hv_tlb_flush_ex {
	u64 gva_list[];
} __packed;

struct hv_partition_assist_pg {
	u32 tlb_lock_count;
};
#endif
+41 −23
Original line number Diff line number Diff line
@@ -320,6 +320,7 @@ struct kvm_mmu_page {
	struct list_head link;
	struct hlist_node hash_link;
	bool unsync;
	u8 mmu_valid_gen;
	bool mmio_cached;

	/*
@@ -335,7 +336,6 @@ struct kvm_mmu_page {
	int root_count;          /* Currently serving as active root */
	unsigned int unsync_children;
	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
	unsigned long mmu_valid_gen;
	DECLARE_BITMAP(unsync_child_bitmap, 512);

#ifdef CONFIG_X86_32
@@ -844,6 +844,8 @@ struct kvm_hv {

	/* How many vCPUs have VP index != vCPU index */
	atomic_t num_mismatched_vp_indexes;

	struct hv_partition_assist_pg *hv_pa_pg;
};

enum kvm_irqchip_mode {
@@ -857,12 +859,13 @@ struct kvm_arch {
	unsigned long n_requested_mmu_pages;
	unsigned long n_max_mmu_pages;
	unsigned int indirect_shadow_pages;
	unsigned long mmu_valid_gen;
	u8 mmu_valid_gen;
	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
	/*
	 * Hash table of struct kvm_mmu_page.
	 */
	struct list_head active_mmu_pages;
	struct list_head zapped_obsolete_pages;
	struct kvm_page_track_notifier_node mmu_sp_tracker;
	struct kvm_page_track_notifier_head track_notifier_head;

@@ -1213,6 +1216,7 @@ struct kvm_x86_ops {
	bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);

	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
};

struct kvm_arch_async_pf {
@@ -1312,18 +1316,42 @@ extern u64 kvm_default_tsc_scaling_ratio;

extern u64 kvm_mce_cap_supported;

enum emulation_result {
	EMULATE_DONE,         /* no further processing */
	EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
	EMULATE_FAIL,         /* can't emulate this instruction */
};

/*
 * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
 *			userspace I/O) to indicate that the emulation context
 *			should be resued as is, i.e. skip initialization of
 *			emulation context, instruction fetch and decode.
 *
 * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
 *		      Indicates that only select instructions (tagged with
 *		      EmulateOnUD) should be emulated (to minimize the emulator
 *		      attack surface).  See also EMULTYPE_TRAP_UD_FORCED.
 *
 * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
 *		   decode the instruction length.  For use *only* by
 *		   kvm_x86_ops->skip_emulated_instruction() implementations.
 *
 * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
 *			  retry native execution under certain conditions.
 *
 * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
 *			     triggered by KVM's magic "force emulation" prefix,
 *			     which is opt in via module param (off by default).
 *			     Bypasses EmulateOnUD restriction despite emulating
 *			     due to an intercepted #UD (see EMULTYPE_TRAP_UD).
 *			     Used to test the full emulator from userspace.
 *
 * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
 *			backdoor emulation, which is opt in via module param.
 *			VMware backoor emulation handles select instructions
 *			and reinjects the #GP for all other cases.
 */
#define EMULTYPE_NO_DECODE	    (1 << 0)
#define EMULTYPE_TRAP_UD	    (1 << 1)
#define EMULTYPE_SKIP		    (1 << 2)
#define EMULTYPE_ALLOW_RETRY	    (1 << 3)
#define EMULTYPE_NO_UD_ON_FAIL	    (1 << 4)
#define EMULTYPE_VMWARE		    (1 << 5)
#define EMULTYPE_TRAP_UD_FORCED	    (1 << 4)
#define EMULTYPE_VMWARE_GP	    (1 << 5)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
					void *insn, int insn_len);
@@ -1506,7 +1534,7 @@ enum {
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)

asmlinkage void __noreturn kvm_spurious_fault(void);
asmlinkage void kvm_spurious_fault(void);

/*
 * Hardware virtualization extension instructions may fault if a
@@ -1514,24 +1542,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void);
 * Usually after catching the fault we just panic; during reboot
 * instead the instruction is ignored.
 */
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)		\
#define __kvm_handle_fault_on_reboot(insn)				\
	"666: \n\t"							\
	insn "\n\t"							\
	"jmp	668f \n\t"						\
	"667: \n\t"							\
	"call	kvm_spurious_fault \n\t"				\
	"668: \n\t"							\
	".pushsection .fixup, \"ax\" \n\t"				\
	"700: \n\t"							\
	cleanup_insn "\n\t"						\
	"cmpb	$0, kvm_rebooting\n\t"					\
	"je	667b \n\t"						\
	"jmp	668b \n\t"						\
	".popsection \n\t"						\
	_ASM_EXTABLE(666b, 700b)

#define __kvm_handle_fault_on_reboot(insn)		\
	____kvm_handle_fault_on_reboot(insn, "")
	_ASM_EXTABLE(666b, 667b)

#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
+1 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ enum {
	INTERCEPT_MWAIT,
	INTERCEPT_MWAIT_COND,
	INTERCEPT_XSETBV,
	INTERCEPT_RDPRU,
};


+2 −0
Original line number Diff line number Diff line
@@ -69,6 +69,7 @@
#define SECONDARY_EXEC_PT_USE_GPA		0x01000000
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	0x00400000
#define SECONDARY_EXEC_TSC_SCALING              0x02000000
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE	0x04000000

#define PIN_BASED_EXT_INTR_MASK                 0x00000001
#define PIN_BASED_NMI_EXITING                   0x00000008
@@ -110,6 +111,7 @@
#define VMX_MISC_SAVE_EFER_LMA			0x00000020
#define VMX_MISC_ACTIVITY_HLT			0x00000040
#define VMX_MISC_ZERO_LEN_INS			0x40000000
#define VMX_MISC_MSR_LIST_MULTIPLIER		512

/* VMFUNC functions */
#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
Loading