Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1609d760 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "The main change here is a revert of reverts. We recently simplified
  some code that was thought unnecessary; however, since then KVM has
  grown quite a few cond_resched()s and for that reason the simplified
  code is prone to livelocks---one CPUs tries to empty a list of guest
  page tables while the others keep adding to them. This adds back the
  generation-based zapping of guest page tables, which was not
  unnecessary after all.

  On top of this, there is a fix for a kernel memory leak and a couple
  of s390 fixlets as well"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/mmu: Reintroduce fast invalidate/zap for flushing memslot
  KVM: x86: work around leak of uninitialized stack contents
  KVM: nVMX: handle page fault in vmread
  KVM: s390: Do not leak kernel stack data in the KVM_S390_INTERRUPT ioctl
  KVM: s390: kvm_s390_vm_start_migration: check dirty_bitmap before using it as target for memset()
parents 1f9c632c a9c20bb0
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -1961,6 +1961,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
	case KVM_S390_MCHK:
		irq->u.mchk.mcic = s390int->parm64;
		break;
	case KVM_S390_INT_PFAULT_INIT:
		irq->u.ext.ext_params = s390int->parm;
		irq->u.ext.ext_params2 = s390int->parm64;
		break;
	case KVM_S390_RESTART:
	case KVM_S390_INT_CLOCK_COMP:
	case KVM_S390_INT_CPU_TIMER:
		break;
	default:
		return -EINVAL;
	}
	return 0;
}
+3 −1
Original line number Diff line number Diff line
@@ -1018,6 +1018,8 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
	/* mark all the pages in active slots as dirty */
	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
		ms = slots->memslots + slotnr;
		if (!ms->dirty_bitmap)
			return -EINVAL;
		/*
		 * The second half of the bitmap is only used on x86,
		 * and would be wasted otherwise, so we put it to good
@@ -4323,7 +4325,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
	}
	case KVM_S390_INTERRUPT: {
		struct kvm_s390_interrupt s390int;
		struct kvm_s390_irq s390irq;
		struct kvm_s390_irq s390irq = {};

		if (copy_from_user(&s390int, argp, sizeof(s390int)))
			return -EFAULT;
+2 −0
Original line number Diff line number Diff line
@@ -335,6 +335,7 @@ struct kvm_mmu_page {
	int root_count;          /* Currently serving as active root */
	unsigned int unsync_children;
	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
	unsigned long mmu_valid_gen;
	DECLARE_BITMAP(unsync_child_bitmap, 512);

#ifdef CONFIG_X86_32
@@ -856,6 +857,7 @@ struct kvm_arch {
	unsigned long n_requested_mmu_pages;
	unsigned long n_max_mmu_pages;
	unsigned int indirect_shadow_pages;
	unsigned long mmu_valid_gen;
	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
	/*
	 * Hash table of struct kvm_mmu_page.
+99 −2
Original line number Diff line number Diff line
@@ -2095,6 +2095,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
	if (!direct)
		sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);

	/*
	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
	 * depends on valid pages being added to the head of the list.  See
	 * comments in kvm_zap_obsolete_pages().
	 */
	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
	return sp;
@@ -2244,7 +2250,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
#define for_each_valid_sp(_kvm, _sp, _gfn)				\
	hlist_for_each_entry(_sp,					\
	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
		if ((_sp)->role.invalid) {    \
		if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) {    \
		} else

#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)			\
@@ -2301,6 +2307,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif

static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
{
	return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
}

static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
			 struct list_head *invalid_list)
{
@@ -2525,6 +2536,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
			flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
	}
	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
	clear_page(sp->spt);
	trace_kvm_mmu_get_page(sp, true);

@@ -4233,6 +4245,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
			return false;

		if (cached_root_available(vcpu, new_cr3, new_role)) {
			/*
			 * It is possible that the cached previous root page is
			 * obsolete because of a change in the MMU generation
			 * number. However, changing the generation number is
			 * accompanied by KVM_REQ_MMU_RELOAD, which will free
			 * the root set here and allocate a new one.
			 */
			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
			if (!skip_tlb_flush) {
				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5649,11 +5668,89 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
	return alloc_mmu_pages(vcpu);
}


static void kvm_zap_obsolete_pages(struct kvm *kvm)
{
	struct kvm_mmu_page *sp, *node;
	LIST_HEAD(invalid_list);
	int ign;

restart:
	list_for_each_entry_safe_reverse(sp, node,
	      &kvm->arch.active_mmu_pages, link) {
		/*
		 * No obsolete valid page exists before a newly created page
		 * since active_mmu_pages is a FIFO list.
		 */
		if (!is_obsolete_sp(kvm, sp))
			break;

		/*
		 * Do not repeatedly zap a root page to avoid unnecessary
		 * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
		 * progress:
		 *    vcpu 0                        vcpu 1
		 *                         call vcpu_enter_guest():
		 *                            1): handle KVM_REQ_MMU_RELOAD
		 *                                and require mmu-lock to
		 *                                load mmu
		 * repeat:
		 *    1): zap root page and
		 *        send KVM_REQ_MMU_RELOAD
		 *
		 *    2): if (cond_resched_lock(mmu-lock))
		 *
		 *                            2): hold mmu-lock and load mmu
		 *
		 *                            3): see KVM_REQ_MMU_RELOAD bit
		 *                                on vcpu->requests is set
		 *                                then return 1 to call
		 *                                vcpu_enter_guest() again.
		 *            goto repeat;
		 *
		 * Since we are reversely walking the list and the invalid
		 * list will be moved to the head, skip the invalid page
		 * can help us to avoid the infinity list walking.
		 */
		if (sp->role.invalid)
			continue;

		if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
			kvm_mmu_commit_zap_page(kvm, &invalid_list);
			cond_resched_lock(&kvm->mmu_lock);
			goto restart;
		}

		if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
			goto restart;
	}

	kvm_mmu_commit_zap_page(kvm, &invalid_list);
}

/*
 * Fast invalidate all shadow pages and use lock-break technique
 * to zap obsolete pages.
 *
 * It's required when memslot is being deleted or VM is being
 * destroyed, in these cases, we should ensure that KVM MMU does
 * not use any resource of the being-deleted slot or all slots
 * after calling the function.
 */
static void kvm_mmu_zap_all_fast(struct kvm *kvm)
{
	spin_lock(&kvm->mmu_lock);
	kvm->arch.mmu_valid_gen++;

	kvm_zap_obsolete_pages(kvm);
	spin_unlock(&kvm->mmu_lock);
}

static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
			struct kvm_memory_slot *slot,
			struct kvm_page_track_notifier_node *node)
{
	kvm_mmu_zap_all(kvm);
	kvm_mmu_zap_all_fast(kvm);
}

void kvm_mmu_init_vm(struct kvm *kvm)
+3 −1
Original line number Diff line number Diff line
@@ -4540,6 +4540,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
	int len;
	gva_t gva = 0;
	struct vmcs12 *vmcs12;
	struct x86_exception e;
	short offset;

	if (!nested_vmx_check_permission(vcpu))
@@ -4588,7 +4589,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
				vmx_instruction_info, true, len, &gva))
			return 1;
		/* _system ok, nested_vmx_check_permission has verified cpl=0 */
		kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
		if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
			kvm_inject_page_fault(vcpu, &e);
	}

	return nested_vmx_succeed(vcpu);
Loading