Loading arch/x86/kvm/vmx.c +186 −254 Original line number Diff line number Diff line Loading @@ -112,6 +112,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 #define MSR_BITMAP_MODE_LM 4 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ Loading Loading @@ -186,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 #define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; Loading @@ -211,6 +218,7 @@ struct loaded_vmcs { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; Loading @@ -227,7 +235,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). Loading Loading @@ -410,13 +418,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 /* Used to remember the last vmcs02 used for some recently used vmcs12s */ struct vmcs02_list { struct list_head list; gpa_t vmptr; struct loaded_vmcs vmcs02; }; /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. Loading @@ -441,15 +442,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; struct loaded_vmcs vmcs02; /* * Guest pages referred to in vmcs02 with host-physical pointers, so * we must keep them pinned while L2 runs. * Guest pages referred to in the vmcs02 with host-physical * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; Loading @@ -458,8 +459,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; unsigned long *msr_bitmap; struct hrtimer preemption_timer; bool preemption_timer_expired; Loading Loading @@ -582,6 +581,7 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; Loading Loading @@ -933,6 +933,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); Loading @@ -952,12 +953,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); enum { VMX_IO_BITMAP_A, VMX_IO_BITMAP_B, VMX_MSR_BITMAP_LEGACY, VMX_MSR_BITMAP_LONGMODE, VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, VMX_MSR_BITMAP_LEGACY_X2APIC, VMX_MSR_BITMAP_LONGMODE_X2APIC, VMX_VMREAD_BITMAP, VMX_VMWRITE_BITMAP, VMX_BITMAP_NR Loading @@ -967,12 +962,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) #define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) #define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) #define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) #define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) #define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) #define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) Loading Loading @@ -2570,36 +2559,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) { unsigned long *msr_bitmap; if (is_guest_mode(vcpu)) msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; else if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; else msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; } else { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; else msr_bitmap = vmx_msr_bitmap_legacy_x2apic; } } else { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode; else msr_bitmap = vmx_msr_bitmap_legacy; } vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); } /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy Loading Loading @@ -2640,7 +2599,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) vmx_set_msr_bitmap(&vmx->vcpu); vmx_update_msr_bitmap(&vmx->vcpu); } /* Loading Loading @@ -3835,11 +3794,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } static struct vmcs *alloc_vmcs(void) { return alloc_vmcs_cpu(raw_smp_processor_id()); } static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); Loading @@ -3855,9 +3809,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; if (loaded_vmcs->msr_bitmap) free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } static struct vmcs *alloc_vmcs(void) { return alloc_vmcs_cpu(raw_smp_processor_id()); } static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) { loaded_vmcs->vmcs = alloc_vmcs(); if (!loaded_vmcs->vmcs) return -ENOMEM; loaded_vmcs->shadow_vmcs = NULL; loaded_vmcs_init(loaded_vmcs); if (cpu_has_vmx_msr_bitmap()) { loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!loaded_vmcs->msr_bitmap) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); } return 0; out_vmcs: free_loaded_vmcs(loaded_vmcs); return -ENOMEM; } static void free_kvm_area(void) { int cpu; Loading Loading @@ -4916,9 +4899,7 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); Loading Loading @@ -4953,6 +4934,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); if (!cpu_has_vmx_msr_bitmap()) return; /* * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals * have the write-low and read-high bitmap offsets the wrong way round. * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. */ if (msr <= 0x1fff) { if (type & MSR_TYPE_R) /* read-low */ __set_bit(msr, msr_bitmap + 0x000 / f); if (type & MSR_TYPE_W) /* write-low */ __set_bit(msr, msr_bitmap + 0x800 / f); } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { msr &= 0x1fff; if (type & MSR_TYPE_R) /* read-high */ __set_bit(msr, msr_bitmap + 0x400 / f); if (type & MSR_TYPE_W) /* write-high */ __set_bit(msr, msr_bitmap + 0xc00 / f); } } static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) vmx_enable_intercept_for_msr(msr_bitmap, msr, type); else vmx_disable_intercept_for_msr(msr_bitmap, msr, type); } /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. Loading Loading @@ -4999,28 +5024,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { if (!longmode_only) __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr, MSR_TYPE_R | MSR_TYPE_W); __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr, MSR_TYPE_R | MSR_TYPE_W); u8 mode = 0; if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { mode |= MSR_BITMAP_MODE_X2APIC; if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) mode |= MSR_BITMAP_MODE_X2APIC_APICV; } static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) if (is_long_mode(vcpu)) mode |= MSR_BITMAP_MODE_LM; return mode; } #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, u8 mode) { if (apicv_active) { __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, msr, type); __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, msr, type); } else { __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, msr, type); __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, msr, type); int msr; for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { unsigned word = msr / BITS_PER_LONG; msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; msr_bitmap[word + (0x800 / sizeof(long))] = ~0; } if (mode & MSR_BITMAP_MODE_X2APIC) { /* * TPR reads and writes can be virtualized even if virtual interrupt * delivery is not in use. */ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); } } } static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; u8 mode = vmx_msr_bitmap_mode(vcpu); u8 changed = mode ^ vmx->msr_bitmap_mode; if (!changed) return; vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, !(mode & MSR_BITMAP_MODE_LM)); if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) Loading Loading @@ -5272,7 +5337,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) vmx_set_msr_bitmap(vcpu); vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) Loading Loading @@ -5459,7 +5524,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ Loading Loading @@ -6742,7 +6807,7 @@ void vmx_enable_tdp(void) static __init int hardware_setup(void) { int r = -ENOMEM, i, msr; int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); Loading @@ -6762,9 +6827,6 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; goto out; Loading Loading @@ -6833,42 +6895,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, vmx_msr_bitmap_legacy, PAGE_SIZE); memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, vmx_msr_bitmap_longmode, PAGE_SIZE); memcpy(vmx_msr_bitmap_legacy_x2apic, vmx_msr_bitmap_legacy, PAGE_SIZE); memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ for (msr = 0x800; msr <= 0x8ff; msr++) { if (msr == 0x839 /* TMCCT */) continue; vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); } /* * TPR reads and writes can be virtualized even if virtual interrupt * delivery is not in use. */ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); /* EOI */ vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); /* SELF-IPI */ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); if (enable_ept) vmx_enable_tdp(); else Loading Loading @@ -6971,94 +6999,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } /* * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. * We could reuse a single VMCS for all the L2 guests, but we also want the * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this * allows keeping them loaded on the processor, and in the future will allow * optimizations where prepare_vmcs02 doesn't need to set all the fields on * every entry if they never change. * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. * * The following functions allocate and free a vmcs02 in this pool. */ /* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) { struct vmcs02_list *item; list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) if (item->vmptr == vmx->nested.current_vmptr) { list_move(&item->list, &vmx->nested.vmcs02_pool); return &item->vmcs02; } if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { /* Recycle the least recently used VMCS. */ item = list_last_entry(&vmx->nested.vmcs02_pool, struct vmcs02_list, list); item->vmptr = vmx->nested.current_vmptr; list_move(&item->list, &vmx->nested.vmcs02_pool); return &item->vmcs02; } /* Create a new VMCS */ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); item->vmcs02.shadow_vmcs = NULL; if (!item->vmcs02.vmcs) { kfree(item); return NULL; } loaded_vmcs_init(&item->vmcs02); item->vmptr = vmx->nested.current_vmptr; list_add(&(item->list), &(vmx->nested.vmcs02_pool)); vmx->nested.vmcs02_num++; return &item->vmcs02; } /* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) { struct vmcs02_list *item; list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) if (item->vmptr == vmptr) { free_loaded_vmcs(&item->vmcs02); list_del(&item->list); kfree(item); vmx->nested.vmcs02_num--; return; } } /* * Free all VMCSs saved for this vcpu, except the one pointed by * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs * must be &vmx->vmcs01. */ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) { struct vmcs02_list *item, *n; WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { /* * Something will leak if the above WARN triggers. Better than * a use-after-free. */ if (vmx->loaded_vmcs == &item->vmcs02) continue; free_loaded_vmcs(&item->vmcs02); list_del(&item->list); kfree(item); vmx->nested.vmcs02_num--; } } /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified Loading Loading @@ -7239,13 +7179,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; int r; if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx->nested.msr_bitmap) goto out_msr_bitmap; } r = alloc_loaded_vmcs(&vmx->nested.vmcs02); if (r < 0) goto out_vmcs02; vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) Loading @@ -7262,9 +7200,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); vmx->nested.vmcs02_num = 0; hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; Loading @@ -7276,9 +7211,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: free_page((unsigned long)vmx->nested.msr_bitmap); free_loaded_vmcs(&vmx->nested.vmcs02); out_msr_bitmap: out_vmcs02: return -ENOMEM; } Loading Loading @@ -7421,10 +7356,6 @@ static void free_nested(struct vcpu_vmx *vmx) free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; if (vmx->nested.msr_bitmap) { free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } if (enable_shadow_vmcs) { vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); Loading @@ -7432,7 +7363,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; Loading @@ -7448,7 +7379,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } nested_free_all_saved_vmcss(vmx); free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ Loading Loading @@ -7491,8 +7422,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); } Loading Loading @@ -8404,10 +8333,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) /* * The host physical addresses of some pages of guest memory * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU * may write to these pages via their host physical address while * L2 is running, bypassing any address-translation-based dirty * tracking (e.g. EPT write protection). * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC * Page). The CPU may write to these pages via their host * physical address while L2 is running, bypassing any * address-translation-based dirty tracking (e.g. EPT write * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. Loading Loading @@ -8941,7 +8871,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); vmx_set_msr_bitmap(vcpu); vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) Loading Loading @@ -9602,6 +9532,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); unsigned long *msr_bitmap; int cpu; if (!vmx) Loading Loading @@ -9634,13 +9565,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs->vmcs = alloc_vmcs(); vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) err = alloc_loaded_vmcs(&vmx->vmcs01); if (err < 0) goto free_msrs; loaded_vmcs_init(vmx->loaded_vmcs); msr_bitmap = vmx->vmcs01.msr_bitmap; vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; Loading Loading @@ -10103,7 +10041,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) Loading Loading @@ -10680,6 +10618,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the Loading Loading @@ -10901,20 +10842,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct loaded_vmcs *vmcs02; u32 msr_entry_idx; u32 exit_qual; vmcs02 = nested_get_current_vmcs02(vmx); if (!vmcs02) return -ENOMEM; enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx_switch_vmcs(vcpu, vmcs02); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { Loading Loading @@ -11483,7 +11419,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) vmx_set_msr_bitmap(vcpu); vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) Loading Loading @@ -11532,10 +11468,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); /* if no vmcs02 cache requested, remove the one we used */ if (VMCS02_POOL_SIZE == 0) nested_free_vmcs02(vmx, vmx->nested.current_vmptr); /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); Loading Loading
arch/x86/kvm/vmx.c +186 −254 Original line number Diff line number Diff line Loading @@ -112,6 +112,14 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 #define MSR_BITMAP_MODE_LM 4 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ Loading Loading @@ -186,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO); extern const ulong vmx_return; #define NR_AUTOLOAD_MSRS 8 #define VMCS02_POOL_SIZE 1 struct vmcs { u32 revision_id; Loading @@ -211,6 +218,7 @@ struct loaded_vmcs { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; }; Loading @@ -227,7 +235,7 @@ struct shared_msr_entry { * stored in guest memory specified by VMPTRLD, but is opaque to the guest, * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. * More than one of these structures may exist, if L1 runs multiple L2 guests. * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the * underlying hardware which will be used to run L2. * This structure is packed to ensure that its layout is identical across * machines (necessary for live migration). Loading Loading @@ -410,13 +418,6 @@ struct __packed vmcs12 { */ #define VMCS12_SIZE 0x1000 /* Used to remember the last vmcs02 used for some recently used vmcs12s */ struct vmcs02_list { struct list_head list; gpa_t vmptr; struct loaded_vmcs vmcs02; }; /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. Loading @@ -441,15 +442,15 @@ struct nested_vmx { */ bool sync_shadow_vmcs; /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; struct loaded_vmcs vmcs02; /* * Guest pages referred to in vmcs02 with host-physical pointers, so * we must keep them pinned while L2 runs. * Guest pages referred to in the vmcs02 with host-physical * pointers, so we must keep them pinned while L2 runs. */ struct page *apic_access_page; struct page *virtual_apic_page; Loading @@ -458,8 +459,6 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; unsigned long *msr_bitmap; struct hrtimer preemption_timer; bool preemption_timer_expired; Loading Loading @@ -582,6 +581,7 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; u8 fail; u8 msr_bitmap_mode; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; Loading Loading @@ -933,6 +933,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); Loading @@ -952,12 +953,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); enum { VMX_IO_BITMAP_A, VMX_IO_BITMAP_B, VMX_MSR_BITMAP_LEGACY, VMX_MSR_BITMAP_LONGMODE, VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, VMX_MSR_BITMAP_LEGACY_X2APIC, VMX_MSR_BITMAP_LONGMODE_X2APIC, VMX_VMREAD_BITMAP, VMX_VMWRITE_BITMAP, VMX_BITMAP_NR Loading @@ -967,12 +962,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) #define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) #define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) #define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) #define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) #define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) #define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) Loading Loading @@ -2570,36 +2559,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) { unsigned long *msr_bitmap; if (is_guest_mode(vcpu)) msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; else if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; else msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; } else { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; else msr_bitmap = vmx_msr_bitmap_legacy_x2apic; } } else { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode; else msr_bitmap = vmx_msr_bitmap_legacy; } vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); } /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy Loading Loading @@ -2640,7 +2599,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; if (cpu_has_vmx_msr_bitmap()) vmx_set_msr_bitmap(&vmx->vcpu); vmx_update_msr_bitmap(&vmx->vcpu); } /* Loading Loading @@ -3835,11 +3794,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return vmcs; } static struct vmcs *alloc_vmcs(void) { return alloc_vmcs_cpu(raw_smp_processor_id()); } static void free_vmcs(struct vmcs *vmcs) { free_pages((unsigned long)vmcs, vmcs_config.order); Loading @@ -3855,9 +3809,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; if (loaded_vmcs->msr_bitmap) free_page((unsigned long)loaded_vmcs->msr_bitmap); WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } static struct vmcs *alloc_vmcs(void) { return alloc_vmcs_cpu(raw_smp_processor_id()); } static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) { loaded_vmcs->vmcs = alloc_vmcs(); if (!loaded_vmcs->vmcs) return -ENOMEM; loaded_vmcs->shadow_vmcs = NULL; loaded_vmcs_init(loaded_vmcs); if (cpu_has_vmx_msr_bitmap()) { loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!loaded_vmcs->msr_bitmap) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); } return 0; out_vmcs: free_loaded_vmcs(loaded_vmcs); return -ENOMEM; } static void free_kvm_area(void) { int cpu; Loading Loading @@ -4916,9 +4899,7 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); Loading Loading @@ -4953,6 +4934,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, } } static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); if (!cpu_has_vmx_msr_bitmap()) return; /* * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals * have the write-low and read-high bitmap offsets the wrong way round. * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. */ if (msr <= 0x1fff) { if (type & MSR_TYPE_R) /* read-low */ __set_bit(msr, msr_bitmap + 0x000 / f); if (type & MSR_TYPE_W) /* write-low */ __set_bit(msr, msr_bitmap + 0x800 / f); } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { msr &= 0x1fff; if (type & MSR_TYPE_R) /* read-high */ __set_bit(msr, msr_bitmap + 0x400 / f); if (type & MSR_TYPE_W) /* write-high */ __set_bit(msr, msr_bitmap + 0xc00 / f); } } static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) vmx_enable_intercept_for_msr(msr_bitmap, msr, type); else vmx_disable_intercept_for_msr(msr_bitmap, msr, type); } /* * If a msr is allowed by L0, we should check whether it is allowed by L1. * The corresponding bit will be cleared unless both of L0 and L1 allow it. Loading Loading @@ -4999,28 +5024,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, } } static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) { if (!longmode_only) __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr, MSR_TYPE_R | MSR_TYPE_W); __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr, MSR_TYPE_R | MSR_TYPE_W); u8 mode = 0; if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { mode |= MSR_BITMAP_MODE_X2APIC; if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) mode |= MSR_BITMAP_MODE_X2APIC_APICV; } static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) if (is_long_mode(vcpu)) mode |= MSR_BITMAP_MODE_LM; return mode; } #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, u8 mode) { if (apicv_active) { __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, msr, type); __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, msr, type); } else { __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, msr, type); __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, msr, type); int msr; for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { unsigned word = msr / BITS_PER_LONG; msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; msr_bitmap[word + (0x800 / sizeof(long))] = ~0; } if (mode & MSR_BITMAP_MODE_X2APIC) { /* * TPR reads and writes can be virtualized even if virtual interrupt * delivery is not in use. */ vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); } } } static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; u8 mode = vmx_msr_bitmap_mode(vcpu); u8 changed = mode ^ vmx->msr_bitmap_mode; if (!changed) return; vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, !(mode & MSR_BITMAP_MODE_LM)); if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); vmx->msr_bitmap_mode = mode; } static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) Loading Loading @@ -5272,7 +5337,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } if (cpu_has_vmx_msr_bitmap()) vmx_set_msr_bitmap(vcpu); vmx_update_msr_bitmap(vcpu); } static u32 vmx_exec_control(struct vcpu_vmx *vmx) Loading Loading @@ -5459,7 +5524,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ Loading Loading @@ -6742,7 +6807,7 @@ void vmx_enable_tdp(void) static __init int hardware_setup(void) { int r = -ENOMEM, i, msr; int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); Loading @@ -6762,9 +6827,6 @@ static __init int hardware_setup(void) memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; goto out; Loading Loading @@ -6833,42 +6895,8 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, vmx_msr_bitmap_legacy, PAGE_SIZE); memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, vmx_msr_bitmap_longmode, PAGE_SIZE); memcpy(vmx_msr_bitmap_legacy_x2apic, vmx_msr_bitmap_legacy, PAGE_SIZE); memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ for (msr = 0x800; msr <= 0x8ff; msr++) { if (msr == 0x839 /* TMCCT */) continue; vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); } /* * TPR reads and writes can be virtualized even if virtual interrupt * delivery is not in use. */ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); /* EOI */ vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); /* SELF-IPI */ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); if (enable_ept) vmx_enable_tdp(); else Loading Loading @@ -6971,94 +6999,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } /* * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. * We could reuse a single VMCS for all the L2 guests, but we also want the * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this * allows keeping them loaded on the processor, and in the future will allow * optimizations where prepare_vmcs02 doesn't need to set all the fields on * every entry if they never change. * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. * * The following functions allocate and free a vmcs02 in this pool. */ /* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) { struct vmcs02_list *item; list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) if (item->vmptr == vmx->nested.current_vmptr) { list_move(&item->list, &vmx->nested.vmcs02_pool); return &item->vmcs02; } if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { /* Recycle the least recently used VMCS. */ item = list_last_entry(&vmx->nested.vmcs02_pool, struct vmcs02_list, list); item->vmptr = vmx->nested.current_vmptr; list_move(&item->list, &vmx->nested.vmcs02_pool); return &item->vmcs02; } /* Create a new VMCS */ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); item->vmcs02.shadow_vmcs = NULL; if (!item->vmcs02.vmcs) { kfree(item); return NULL; } loaded_vmcs_init(&item->vmcs02); item->vmptr = vmx->nested.current_vmptr; list_add(&(item->list), &(vmx->nested.vmcs02_pool)); vmx->nested.vmcs02_num++; return &item->vmcs02; } /* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) { struct vmcs02_list *item; list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) if (item->vmptr == vmptr) { free_loaded_vmcs(&item->vmcs02); list_del(&item->list); kfree(item); vmx->nested.vmcs02_num--; return; } } /* * Free all VMCSs saved for this vcpu, except the one pointed by * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs * must be &vmx->vmcs01. */ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) { struct vmcs02_list *item, *n; WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { /* * Something will leak if the above WARN triggers. Better than * a use-after-free. */ if (vmx->loaded_vmcs == &item->vmcs02) continue; free_loaded_vmcs(&item->vmcs02); list_del(&item->list); kfree(item); vmx->nested.vmcs02_num--; } } /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), * set the success or error code of an emulated VMX instruction, as specified Loading Loading @@ -7239,13 +7179,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs *shadow_vmcs; int r; if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx->nested.msr_bitmap) goto out_msr_bitmap; } r = alloc_loaded_vmcs(&vmx->nested.vmcs02); if (r < 0) goto out_vmcs02; vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) Loading @@ -7262,9 +7200,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = shadow_vmcs; } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); vmx->nested.vmcs02_num = 0; hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; Loading @@ -7276,9 +7211,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: free_page((unsigned long)vmx->nested.msr_bitmap); free_loaded_vmcs(&vmx->nested.vmcs02); out_msr_bitmap: out_vmcs02: return -ENOMEM; } Loading Loading @@ -7421,10 +7356,6 @@ static void free_nested(struct vcpu_vmx *vmx) free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; if (vmx->nested.msr_bitmap) { free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } if (enable_shadow_vmcs) { vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); Loading @@ -7432,7 +7363,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); vmx->nested.apic_access_page = NULL; Loading @@ -7448,7 +7379,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } nested_free_all_saved_vmcss(vmx); free_loaded_vmcs(&vmx->nested.vmcs02); } /* Emulate the VMXOFF instruction */ Loading Loading @@ -7491,8 +7422,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) vmptr + offsetof(struct vmcs12, launch_state), &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); } Loading Loading @@ -8404,10 +8333,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) /* * The host physical addresses of some pages of guest memory * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU * may write to these pages via their host physical address while * L2 is running, bypassing any address-translation-based dirty * tracking (e.g. EPT write protection). * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC * Page). The CPU may write to these pages via their host * physical address while L2 is running, bypassing any * address-translation-based dirty tracking (e.g. EPT write * protection). * * Mark them dirty on every exit from L2 to prevent them from * getting out of sync with dirty tracking. Loading Loading @@ -8941,7 +8871,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); vmx_set_msr_bitmap(vcpu); vmx_update_msr_bitmap(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) Loading Loading @@ -9602,6 +9532,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); unsigned long *msr_bitmap; int cpu; if (!vmx) Loading Loading @@ -9634,13 +9565,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->guest_msrs) goto free_pml; vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs->vmcs = alloc_vmcs(); vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) err = alloc_loaded_vmcs(&vmx->vmcs01); if (err < 0) goto free_msrs; loaded_vmcs_init(vmx->loaded_vmcs); msr_bitmap = vmx->vmcs01.msr_bitmap; vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); vmx->vcpu.cpu = cpu; Loading Loading @@ -10103,7 +10041,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, int msr; struct page *page; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) Loading Loading @@ -10680,6 +10618,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the Loading Loading @@ -10901,20 +10842,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct loaded_vmcs *vmcs02; u32 msr_entry_idx; u32 exit_qual; vmcs02 = nested_get_current_vmcs02(vmx); if (!vmcs02) return -ENOMEM; enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); vmx_switch_vmcs(vcpu, vmcs02); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { Loading Loading @@ -11483,7 +11419,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_DEBUGCTL, 0); if (cpu_has_vmx_msr_bitmap()) vmx_set_msr_bitmap(vcpu); vmx_update_msr_bitmap(vcpu); if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) Loading Loading @@ -11532,10 +11468,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); /* if no vmcs02 cache requested, remove the one we used */ if (VMCS02_POOL_SIZE == 0) nested_free_vmcs02(vmx, vmx->nested.current_vmptr); /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); Loading