Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6bdb913f authored by Haggai Eran's avatar Haggai Eran Committed by Linus Torvalds
Browse files

mm: wrap calls to set_pte_at_notify with invalidate_range_start and invalidate_range_end



In order to allow sleeping during invalidate_page mmu notifier calls, we
need to avoid calling when holding the PT lock.  In addition to its direct
calls, invalidate_page can also be called as a substitute for a change_pte
call, in case the notifier client hasn't implemented change_pte.

This patch drops the invalidate_page call from change_pte, and instead
wraps all calls to change_pte with invalidate_range_start and
invalidate_range_end calls.

Note that change_pte still cannot sleep after this patch, and that clients
implementing change_pte should not take action on it in case the number of
outstanding invalidate_range_start calls is larger than one, otherwise
they might miss a later invalidation.

Signed-off-by: default avatarHaggai Eran <haggaie@mellanox.com>
Cc: Andrea Arcangeli <andrea@qumranet.com>
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Haggai Eran <haggaie@mellanox.com>
Cc: Shachar Raindel <raindel@mellanox.com>
Cc: Liran Liss <liranl@mellanox.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2ec74c3e
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -141,10 +141,14 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
	spinlock_t *ptl;
	pte_t *ptep;
	int err;
	/* For mmu_notifiers */
	const unsigned long mmun_start = addr;
	const unsigned long mmun_end   = addr + PAGE_SIZE;

	/* For try_to_free_swap() and munlock_vma_page() below */
	lock_page(page);

	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
	err = -EAGAIN;
	ptep = page_check_address(page, mm, addr, &ptl, 0);
	if (!ptep)
@@ -173,6 +177,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,

	err = 0;
 unlock:
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	unlock_page(page);
	return err;
}
+19 −2
Original line number Diff line number Diff line
@@ -709,15 +709,22 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
	spinlock_t *ptl;
	int swapped;
	int err = -EFAULT;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */

	addr = page_address_in_vma(page, vma);
	if (addr == -EFAULT)
		goto out;

	BUG_ON(PageTransCompound(page));

	mmun_start = addr;
	mmun_end   = addr + PAGE_SIZE;
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);

	ptep = page_check_address(page, mm, addr, &ptl, 0);
	if (!ptep)
		goto out;
		goto out_mn;

	if (pte_write(*ptep) || pte_dirty(*ptep)) {
		pte_t entry;
@@ -752,6 +759,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,

out_unlock:
	pte_unmap_unlock(ptep, ptl);
out_mn:
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
	return err;
}
@@ -776,6 +785,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
	spinlock_t *ptl;
	unsigned long addr;
	int err = -EFAULT;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */

	addr = page_address_in_vma(page, vma);
	if (addr == -EFAULT)
@@ -794,10 +805,14 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
	if (!pmd_present(*pmd))
		goto out;

	mmun_start = addr;
	mmun_end   = addr + PAGE_SIZE;
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);

	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
	if (!pte_same(*ptep, orig_pte)) {
		pte_unmap_unlock(ptep, ptl);
		goto out;
		goto out_mn;
	}

	get_page(kpage);
@@ -814,6 +829,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,

	pte_unmap_unlock(ptep, ptl);
	err = 0;
out_mn:
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
	return err;
}
+12 −6
Original line number Diff line number Diff line
@@ -2527,6 +2527,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
	int ret = 0;
	int page_mkwrite = 0;
	struct page *dirty_page = NULL;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */
	bool mmun_called = false;	/* For mmu_notifiers */

	old_page = vm_normal_page(vma, address, orig_pte);
	if (!old_page) {
@@ -2704,6 +2707,11 @@ gotten:
	if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
		goto oom_free_new;

	mmun_start  = address & PAGE_MASK;
	mmun_end    = (address & PAGE_MASK) + PAGE_SIZE;
	mmun_called = true;
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);

	/*
	 * Re-check the pte - we dropped the lock
	 */
@@ -2766,14 +2774,12 @@ gotten:
	} else
		mem_cgroup_uncharge_page(new_page);

	if (new_page)
		page_cache_release(new_page);
unlock:
	pte_unmap_unlock(page_table, ptl);
	if (new_page) {
		if (new_page == old_page)
			/* cow happened, notify before releasing old_page */
			mmu_notifier_invalidate_page(mm, address);
		page_cache_release(new_page);
	}
	if (mmun_called)
		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	if (old_page) {
		/*
		 * Don't let another task, with possibly unlocked vma,
+0 −6
Original line number Diff line number Diff line
@@ -137,12 +137,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
		if (mn->ops->change_pte)
			mn->ops->change_pte(mn, mm, address, pte);
		/*
		 * Some drivers don't have change_pte,
		 * so we must call invalidate_page in that case.
		 */
		else if (mn->ops->invalidate_page)
			mn->ops->invalidate_page(mn, mm, address);
	}
	srcu_read_unlock(&srcu, id);
}