Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3cfc37dc authored by Laurent Dufour's avatar Laurent Dufour Committed by Gerrit - the friendly Code Review server
Browse files

mm: protect VMA modifications using VMA sequence count



The VMA sequence count has been introduced to allow fast detection of
VMA modification when running a page fault handler without holding
the mmap_sem.

This patch provides protection against the VMA modification done in :
	- madvise()
	- mpol_rebind_policy()
	- vma_replace_policy()
	- change_prot_numa()
	- mlock(), munlock()
	- mprotect()
	- mmap_region()
	- collapse_huge_page()
	- userfaultd registering services

In addition, VMA fields which will be read during the speculative fault
path needs to be written using WRITE_ONCE to prevent write to be split
and intermediate values to be pushed to other CPUs.

Change-Id: Ic36046b7254e538b6baf7144c50ae577ee7f2074
Signed-off-by: default avatarLaurent Dufour <ldufour@linux.vnet.ibm.com>
Patch-mainline: linux-mm @ Tue, 17 Apr 2018 16:33:15
[vinmenon@codeaurora.org: trivial merge conflict fixes]
Signed-off-by: default avatarVinayak Menon <vinmenon@codeaurora.org>
[charante@codeaurora.org: trivial merge conflict fixes]
Signed-off-by: default avatarCharan Teja Reddy <charante@codeaurora.org>
parent 88a78dc2
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -1201,8 +1201,11 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
					goto out_mm;
				}
				for (vma = mm->mmap; vma; vma = vma->vm_next) {
					vma->vm_flags &= ~VM_SOFTDIRTY;
					vm_write_begin(vma);
					WRITE_ONCE(vma->vm_flags,
						vma->vm_flags & ~VM_SOFTDIRTY);
					vma_set_page_prot(vma);
					vm_write_end(vma);
				}
				downgrade_write(&mm->mmap_sem);
				break;
+13 −4
Original line number Diff line number Diff line
@@ -664,8 +664,11 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)

	octx = vma->vm_userfaultfd_ctx.ctx;
	if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
		vm_write_begin(vma);
		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
		vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
		WRITE_ONCE(vma->vm_flags,
			   vma->vm_flags & ~(VM_UFFD_WP | VM_UFFD_MISSING));
		vm_write_end(vma);
		return 0;
	}

@@ -904,8 +907,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
			vma = prev;
		else
			prev = vma;
		vma->vm_flags = new_flags;
		vm_write_begin(vma);
		WRITE_ONCE(vma->vm_flags, new_flags);
		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
		vm_write_end(vma);
	}
	up_write(&mm->mmap_sem);
	mmput(mm);
@@ -1468,8 +1473,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
		 * the next vma was merged into the current one and
		 * the current one has not been updated yet.
		 */
		vma->vm_flags = new_flags;
		vm_write_begin(vma);
		WRITE_ONCE(vma->vm_flags, new_flags);
		vma->vm_userfaultfd_ctx.ctx = ctx;
		vm_write_end(vma);

	skip:
		prev = vma;
@@ -1629,8 +1636,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
		 * the next vma was merged into the current one and
		 * the current one has not been updated yet.
		 */
		vma->vm_flags = new_flags;
		vm_write_begin(vma);
		WRITE_ONCE(vma->vm_flags, new_flags);
		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
		vm_write_end(vma);

	skip:
		prev = vma;
+3 −0
Original line number Diff line number Diff line
@@ -1012,6 +1012,7 @@ static void collapse_huge_page(struct mm_struct *mm,
	if (mm_find_pmd(mm, address) != pmd)
		goto out;

	vm_write_begin(vma);
	anon_vma_lock_write(vma->anon_vma);

	pte = pte_offset_map(pmd, address);
@@ -1047,6 +1048,7 @@ static void collapse_huge_page(struct mm_struct *mm,
		pmd_populate(mm, pmd, pmd_pgtable(_pmd));
		spin_unlock(pmd_ptl);
		anon_vma_unlock_write(vma->anon_vma);
		vm_write_end(vma);
		result = SCAN_FAIL;
		goto out;
	}
@@ -1081,6 +1083,7 @@ static void collapse_huge_page(struct mm_struct *mm,
	set_pmd_at(mm, address, pmd, _pmd);
	update_mmu_cache_pmd(vma, address, pmd);
	spin_unlock(pmd_ptl);
	vm_write_end(vma);

	*hpage = NULL;

+5 −1
Original line number Diff line number Diff line
@@ -184,7 +184,9 @@ static long madvise_behavior(struct vm_area_struct *vma,
	/*
	 * vm_flags is protected by the mmap_sem held in write mode.
	 */
	vma->vm_flags = new_flags;
	vm_write_begin(vma);
	WRITE_ONCE(vma->vm_flags, new_flags);
	vm_write_end(vma);
out:
	return error;
}
@@ -450,9 +452,11 @@ static void madvise_free_page_range(struct mmu_gather *tlb,
		.private = tlb,
	};

	vm_write_begin(vma);
	tlb_start_vma(tlb, vma);
	walk_page_range(addr, end, &free_walk);
	tlb_end_vma(tlb, vma);
	vm_write_end(vma);
}

static int madvise_free_single_vma(struct vm_area_struct *vma,
+34 −17
Original line number Diff line number Diff line
@@ -380,8 +380,11 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
	struct vm_area_struct *vma;

	down_write(&mm->mmap_sem);
	for (vma = mm->mmap; vma; vma = vma->vm_next)
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		vm_write_begin(vma);
		mpol_rebind_policy(vma->vm_policy, new);
		vm_write_end(vma);
	}
	up_write(&mm->mmap_sem);
}

@@ -554,9 +557,11 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
{
	int nr_updated;

	vm_write_begin(vma);
	nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
	if (nr_updated)
		count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
	vm_write_end(vma);

	return nr_updated;
}
@@ -657,6 +662,7 @@ static int vma_replace_policy(struct vm_area_struct *vma,
	if (IS_ERR(new))
		return PTR_ERR(new);

	vm_write_begin(vma);
	if (vma->vm_ops && vma->vm_ops->set_policy) {
		err = vma->vm_ops->set_policy(vma, new);
		if (err)
@@ -664,11 +670,17 @@ static int vma_replace_policy(struct vm_area_struct *vma,
	}

	old = vma->vm_policy;
	vma->vm_policy = new; /* protected by mmap_sem */
	/*
	 * The speculative page fault handler accesses this field without
	 * hodling the mmap_sem.
	 */
	WRITE_ONCE(vma->vm_policy,  new);
	vm_write_end(vma);
	mpol_put(old);

	return 0;
 err_out:
	vm_write_end(vma);
	mpol_put(new);
	return err;
}
@@ -1615,14 +1627,20 @@ COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid,
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
						unsigned long addr)
{
	struct mempolicy *pol = NULL;
	struct mempolicy *pol;

	if (vma) {
		if (vma->vm_ops && vma->vm_ops->get_policy) {
			pol = vma->vm_ops->get_policy(vma, addr);
		} else if (vma->vm_policy) {
			pol = vma->vm_policy;
	if (!vma)
		return NULL;

	if (vma->vm_ops && vma->vm_ops->get_policy)
		return vma->vm_ops->get_policy(vma, addr);

	/*
	 * This could be called without holding the mmap_sem in the
	 * speculative page fault handler's path.
	 */
	pol = READ_ONCE(vma->vm_policy);
	if (pol) {
		/*
		 * shmem_alloc_page() passes MPOL_F_SHARED policy with
		 * a pseudo vma whose vma->vm_ops=NULL. Take a reference
@@ -1632,7 +1650,6 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
		if (mpol_needs_cond_ref(pol))
			mpol_get(pol);
	}
	}

	return pol;
}
Loading