Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e61cf2e3 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull first set of KVM updates from Paolo Bonzini:
 "PPC:
   - minor code cleanups

  x86:
   - PCID emulation and CR3 caching for shadow page tables
   - nested VMX live migration
   - nested VMCS shadowing
   - optimized IPI hypercall
   - some optimizations

  ARM will come next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits)
  kvm: x86: Set highest physical address bits in non-present/reserved SPTEs
  KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c
  KVM: X86: Implement PV IPIs in linux guest
  KVM: X86: Add kvm hypervisor init time platform setup callback
  KVM: X86: Implement "send IPI" hypercall
  KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs()
  KVM: x86: Skip pae_root shadow allocation if tdp enabled
  KVM/MMU: Combine flushing remote tlb in mmu_set_spte()
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible
  KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible
  KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup
  KVM: vmx: move struct host_state usage to struct loaded_vmcs
  KVM: vmx: compute need to reload FS/GS/LDT on demand
  KVM: nVMX: remove a misleading comment regarding vmcs02 fields
  KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state()
  KVM: vmx: add dedicated utility to access guest's kernel_gs_base
  KVM: vmx: track host_state.loaded using a loaded_vmcs pointer
  KVM: vmx: refactor segmentation code in vmx_save_host_state()
  kvm: nVMX: Fix fault priority for VMX operations
  kvm: nVMX: Fix fault vector for VMX operation at CPL > 0
  ...
parents 1009aa12 28a1f3ac
Loading
Loading
Loading
Loading
+56 −0
Original line number Original line Diff line number Diff line
@@ -3561,6 +3561,62 @@ Returns: 0 on success,
	-ENOENT on deassign if the conn_id isn't registered
	-ENOENT on deassign if the conn_id isn't registered
	-EEXIST on assign if the conn_id is already registered
	-EEXIST on assign if the conn_id is already registered


4.114 KVM_GET_NESTED_STATE

Capability: KVM_CAP_NESTED_STATE
Architectures: x86
Type: vcpu ioctl
Parameters: struct kvm_nested_state (in/out)
Returns: 0 on success, -1 on error
Errors:
  E2BIG:     the total state size (including the fixed-size part of struct
             kvm_nested_state) exceeds the value of 'size' specified by
             the user; the size required will be written into size.

struct kvm_nested_state {
	__u16 flags;
	__u16 format;
	__u32 size;
	union {
		struct kvm_vmx_nested_state vmx;
		struct kvm_svm_nested_state svm;
		__u8 pad[120];
	};
	__u8 data[0];
};

#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
#define KVM_STATE_NESTED_RUN_PENDING	0x00000002

#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
#define KVM_STATE_NESTED_SMM_VMXON	0x00000002

struct kvm_vmx_nested_state {
	__u64 vmxon_pa;
	__u64 vmcs_pa;

	struct {
		__u16 flags;
	} smm;
};

This ioctl copies the vcpu's nested virtualization state from the kernel to
userspace.

The maximum size of the state, including the fixed-size part of struct
kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
the KVM_CHECK_EXTENSION ioctl().

4.115 KVM_SET_NESTED_STATE

Capability: KVM_CAP_NESTED_STATE
Architectures: x86
Type: vcpu ioctl
Parameters: struct kvm_nested_state (in)
Returns: 0 on success, -1 on error

This copies the vcpu's kvm_nested_state struct from userspace to the kernel.  For
the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.


5. The kvm_run structure
5. The kvm_run structure
------------------------
------------------------
+4 −0
Original line number Original line Diff line number Diff line
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
                                   ||       || can be enabled by setting bit 2
                                   ||       || can be enabled by setting bit 2
                                   ||       || when writing to msr 0x4b564d02
                                   ||       || when writing to msr 0x4b564d02
------------------------------------------------------------------------------
------------------------------------------------------------------------------
KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
                                   ||       || before using paravirtualized
                                   ||       || send IPIs.
------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                   ||       || per-cpu warps are expected in
                                   ||       || per-cpu warps are expected in
                                   ||       || kvmclock.
                                   ||       || kvmclock.
+20 −0
Original line number Original line Diff line number Diff line
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.


Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.

6. KVM_HC_SEND_IPI
------------------------
Architecture: x86
Status: active
Purpose: Send IPIs to multiple vCPUs.

a0: lower part of the bitmap of destination APIC IDs
a1: higher part of the bitmap of destination APIC IDs
a2: the lowest APIC ID in bitmap
a3: APIC ICR

The hypercall lets a guest send multicast IPIs, with at most 128
128 destinations per hypercall in 64-bit mode and 64 vCPUs per
hypercall in 32-bit mode.  The destinations are represented by a
bitmap contained in the first two arguments (a0 and a1). Bit 0 of
a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.

Returns the number of CPUs to which the IPIs were delivered successfully.
+47 −0
Original line number Original line Diff line number Diff line
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
#define SPLIT_HACK_MASK			0xff000000
#define SPLIT_HACK_MASK			0xff000000
#define SPLIT_HACK_OFFS			0xfb000000
#define SPLIT_HACK_OFFS			0xfb000000


/*
 * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
 * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
 * (but not its actual threading mode, which is not available) to avoid
 * collisions.
 *
 * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
 * 0) unchanged: if the guest is filling each VCORE completely then it will be
 * using consecutive IDs and it will fill the space without any packing.
 *
 * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
 * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
 * added to avoid collisions.
 *
 * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
 * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
 * can be safely packed into the second half of each VCORE by adding an offset
 * of (stride / 2).
 *
 * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
 * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
 * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
 *
 * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
 * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
 * must be free to use.
 *
 * (The offsets for each block are stored in block_offsets[], indexed by the
 * block number if the stride is 8. For cases where the guest's stride is less
 * than 8, we can re-use the block_offsets array by multiplying the block
 * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
 */
static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
{
	const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
	int stride = kvm->arch.emul_smt_mode;
	int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
	u32 packed_id;

	if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
		return 0;
	packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
	if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
		return 0;
	return packed_id;
}

#endif /* __ASM_KVM_BOOK3S_H__ */
#endif /* __ASM_KVM_BOOK3S_H__ */
+16 −10
Original line number Original line Diff line number Diff line
@@ -42,7 +42,14 @@
#define KVM_USER_MEM_SLOTS	512
#define KVM_USER_MEM_SLOTS	512


#include <asm/cputhreads.h>
#include <asm/cputhreads.h>
#define KVM_MAX_VCPU_ID                (threads_per_subcore * KVM_MAX_VCORES)

#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h>		/* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID		(MAX_SMT_THREADS * KVM_MAX_VCORES)

#else
#define KVM_MAX_VCPU_ID		KVM_MAX_VCPUS
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */


#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define __KVM_HAVE_ARCH_INTC_INITIALIZED


@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
	gva_t vaddr_accessed;
	gva_t vaddr_accessed;
	pgd_t *pgdir;
	pgd_t *pgdir;


	u8 io_gpr; /* GPR used as IO source/target */
	u16 io_gpr; /* GPR used as IO source/target */
	u8 mmio_host_swabbed;
	u8 mmio_host_swabbed;
	u8 mmio_sign_extend;
	u8 mmio_sign_extend;
	/* conversion between single and double precision */
	/* conversion between single and double precision */
@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
	 */
	 */
	u8 mmio_vsx_copy_nums;
	u8 mmio_vsx_copy_nums;
	u8 mmio_vsx_offset;
	u8 mmio_vsx_offset;
	u8 mmio_vsx_tx_sx_enabled;
	u8 mmio_vmx_copy_nums;
	u8 mmio_vmx_copy_nums;
	u8 mmio_vmx_offset;
	u8 mmio_vmx_offset;
	u8 mmio_copy_type;
	u8 mmio_copy_type;
@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST	2
#define KVMPPC_VCPU_BUSY_IN_HOST	2


/* Values for vcpu->arch.io_gpr */
/* Values for vcpu->arch.io_gpr */
#define KVM_MMIO_REG_MASK	0x001f
#define KVM_MMIO_REG_MASK	0x003f
#define KVM_MMIO_REG_EXT_MASK	0xffe0
#define KVM_MMIO_REG_EXT_MASK	0xffc0
#define KVM_MMIO_REG_GPR	0x0000
#define KVM_MMIO_REG_GPR	0x0000
#define KVM_MMIO_REG_FPR	0x0020
#define KVM_MMIO_REG_FPR	0x0040
#define KVM_MMIO_REG_QPR	0x0040
#define KVM_MMIO_REG_QPR	0x0080
#define KVM_MMIO_REG_FQPR	0x0060
#define KVM_MMIO_REG_FQPR	0x00c0
#define KVM_MMIO_REG_VSX	0x0080
#define KVM_MMIO_REG_VSX	0x0100
#define KVM_MMIO_REG_VMX	0x00c0
#define KVM_MMIO_REG_VMX	0x0180


#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
#define __KVM_HAVE_CREATE_DEVICE
Loading