Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ff04da7d authored by Laurent Dufour's avatar Laurent Dufour Committed by Vinayak Menon
Browse files

mm: cache some VMA fields in the vm_fault structure



When handling speculative page fault, the vma->vm_flags and
vma->vm_page_prot fields are read once the page table lock is released. So
there is no more guarantee that these fields would not change in our back.
They will be saved in the vm_fault structure before the VMA is checked for
changes.

This patch also set the fields in hugetlb_no_page() and
__collapse_huge_page_swapin even if it is not need for the callee.

Signed-off-by: default avatarLaurent Dufour <ldufour@linux.vnet.ibm.com>
Change-Id: I9821f02ea32ef220b57b8bfd817992bbf71bbb1d
Patch-mainline: linux-mm @ Tue, 17 Apr 2018 16:33:18
[vinmenon@codeaurora.org: 4.9 porting conflict fixes]
Signed-off-by: default avatarVinayak Menon <vinmenon@codeaurora.org>
parent e032f8a3
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -341,6 +341,12 @@ struct fault_env {
					 * page table to avoid allocation from
					 * atomic context.
					 */
	/*
	 * These entries are required when handling speculative page fault.
	 * This way the page handling is done using consistent field values.
	 */
	unsigned long vma_flags;
	pgprot_t vma_page_prot;
};

/*
@@ -623,9 +629,9 @@ void free_compound_page(struct page *page);
 * pte_mkwrite.  But get_user_pages can cause write faults for mappings
 * that do not have writing enabled, when used by access_process_vm.
 */
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
static inline pte_t maybe_mkwrite(pte_t pte, unsigned long vma_flags)
{
	if (likely(vma->vm_flags & VM_WRITE))
	if (likely(vma_flags & VM_WRITE))
		pte = pte_mkwrite(pte);
	return pte;
}
+3 −3
Original line number Diff line number Diff line
@@ -957,8 +957,8 @@ static int do_huge_pmd_wp_page_fallback(struct fault_env *fe, pmd_t orig_pmd,

	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
		pte_t entry;
		entry = mk_pte(pages[i], vma->vm_page_prot);
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		entry = mk_pte(pages[i], fe->vma_page_prot);
		entry = maybe_mkwrite(pte_mkdirty(entry), fe->vma_flags);
		memcg = (void *)page_private(pages[i]);
		set_page_private(pages[i], 0);
		page_add_new_anon_rmap(pages[i], fe->vma, haddr, false);
@@ -1678,7 +1678,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
				entry = pte_swp_mksoft_dirty(entry);
		} else {
			entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
			entry = maybe_mkwrite(entry, vma);
			entry = maybe_mkwrite(entry, vma->vm_flags);
			if (!write)
				entry = pte_wrprotect(entry);
			if (!young)
+2 −0
Original line number Diff line number Diff line
@@ -887,6 +887,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
		.address = address,
		.flags = FAULT_FLAG_ALLOW_RETRY,
		.pmd = pmd,
		.vma_flags = vma->vm_flags,
		.vma_page_prot = vma->vm_page_prot,
	};

	/* we only decide to swapin, if there is enough young ptes */
+23 −21
Original line number Diff line number Diff line
@@ -2116,7 +2116,7 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte,

	flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
	entry = pte_mkyoung(orig_pte);
	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
	entry = maybe_mkwrite(pte_mkdirty(entry), fe->vma_flags);
	if (ptep_set_access_flags(vma, fe->address, fe->pte, entry, 1))
		update_mmu_cache(vma, fe->address, fe->pte);
	pte_unmap_unlock(fe->pte, fe->ptl);
@@ -2218,8 +2218,8 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
			inc_mm_counter_fast(mm, MM_ANONPAGES);
		}
		flush_cache_page(vma, fe->address, pte_pfn(orig_pte));
		entry = mk_pte(new_page, vma->vm_page_prot);
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		entry = mk_pte(new_page, fe->vma_page_prot);
		entry = maybe_mkwrite(pte_mkdirty(entry), fe->vma_flags);
		/*
		 * Clear the pte entry and flush it first, before updating the
		 * pte with the new entry. This will avoid a race condition
@@ -2280,7 +2280,7 @@ static int wp_page_copy(struct fault_env *fe, pte_t orig_pte,
		 * Don't let another task, with possibly unlocked vma,
		 * keep the mlocked page.
		 */
		if (page_copied && (vma->vm_flags & VM_LOCKED)) {
		if (page_copied && (fe->vma_flags & VM_LOCKED)) {
			lock_page(old_page);	/* LRU manipulation */
			if (PageMlocked(old_page))
				munlock_vma_page(old_page);
@@ -2407,7 +2407,7 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
		 * We should not cow pages in a shared writeable mapping.
		 * Just mark the pages writable and/or call ops->pfn_mkwrite.
		 */
		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
		if ((fe->vma_flags & (VM_WRITE|VM_SHARED)) ==
				     (VM_WRITE|VM_SHARED))
			return wp_pfn_shared(fe, orig_pte);

@@ -2453,7 +2453,7 @@ static int do_wp_page(struct fault_env *fe, pte_t orig_pte)
			return wp_page_reuse(fe, orig_pte, old_page, 0, 0);
		}
		unlock_page(old_page);
	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
	} else if (unlikely((fe->vma_flags & (VM_WRITE|VM_SHARED)) ==
					(VM_WRITE|VM_SHARED))) {
		return wp_page_shared(fe, orig_pte, old_page);
	}
@@ -2683,9 +2683,9 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)

	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
	pte = mk_pte(page, vma->vm_page_prot);
	pte = mk_pte(page, fe->vma_page_prot);
	if ((fe->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) {
		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
		pte = maybe_mkwrite(pte_mkdirty(pte), fe->vma_flags);
		fe->flags &= ~FAULT_FLAG_WRITE;
		ret |= VM_FAULT_WRITE;
		exclusive = RMAP_EXCLUSIVE;
@@ -2706,7 +2706,7 @@ int do_swap_page(struct fault_env *fe, pte_t orig_pte)

	swap_free(entry);
	if (mem_cgroup_swap_full(page) ||
	    (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
	    (fe->vma_flags & VM_LOCKED) || PageMlocked(page))
		try_to_free_swap(page);
	unlock_page(page);
	if (page != swapcache) {
@@ -2764,7 +2764,7 @@ static int do_anonymous_page(struct fault_env *fe)
	pte_t entry;

	/* File mapping without ->vm_ops ? */
	if (vma->vm_flags & VM_SHARED)
	if (fe->vma_flags & VM_SHARED)
		return VM_FAULT_SIGBUS;

	/*
@@ -2788,7 +2788,7 @@ static int do_anonymous_page(struct fault_env *fe)
	if (!(fe->flags & FAULT_FLAG_WRITE) &&
			!mm_forbids_zeropage(vma->vm_mm)) {
		entry = pte_mkspecial(pfn_pte(my_zero_pfn(fe->address),
						vma->vm_page_prot));
						fe->vma_page_prot));
		if (!pte_map_lock(vma->vm_mm, fe))
			return VM_FAULT_RETRY;
		if (!pte_none(*fe->pte))
@@ -2818,8 +2818,8 @@ static int do_anonymous_page(struct fault_env *fe)
	 */
	__SetPageUptodate(page);

	entry = mk_pte(page, vma->vm_page_prot);
	if (vma->vm_flags & VM_WRITE)
	entry = mk_pte(page, fe->vma_page_prot);
	if (fe->vma_flags & VM_WRITE)
		entry = pte_mkwrite(pte_mkdirty(entry));

	if (!pte_map_lock(vma->vm_mm, fe)) {
@@ -3000,7 +3000,7 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
	for (i = 0; i < HPAGE_PMD_NR; i++)
		flush_icache_page(vma, page + i);

	entry = mk_huge_pmd(page, vma->vm_page_prot);
	entry = mk_huge_pmd(page, fe->vma_page_prot);
	if (write)
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);

@@ -3068,11 +3068,11 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
		return VM_FAULT_NOPAGE;

	flush_icache_page(vma, page);
	entry = mk_pte(page, vma->vm_page_prot);
	entry = mk_pte(page, fe->vma_page_prot);
	if (write)
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		entry = maybe_mkwrite(pte_mkdirty(entry), fe->vma_flags);
	/* copy-on-write page */
	if (write && !(vma->vm_flags & VM_SHARED)) {
	if (write && !(fe->vma_flags & VM_SHARED)) {
		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
		page_add_new_anon_rmap(page, vma, fe->address, false);
		mem_cgroup_commit_charge(page, memcg, false, false);
@@ -3364,7 +3364,7 @@ static int do_fault(struct fault_env *fe)
		return VM_FAULT_SIGBUS;
	if (!(fe->flags & FAULT_FLAG_WRITE))
		return do_read_fault(fe, pgoff);
	if (!(vma->vm_flags & VM_SHARED))
	if (!(fe->vma_flags & VM_SHARED))
		return do_cow_fault(fe, pgoff);
	return do_shared_fault(fe, pgoff);
}
@@ -3412,7 +3412,7 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
	}

	/* Make it present again */
	pte = pte_modify(pte, vma->vm_page_prot);
	pte = pte_modify(pte, fe->vma_page_prot);
	pte = pte_mkyoung(pte);
	if (was_writable)
		pte = pte_mkwrite(pte);
@@ -3446,7 +3446,7 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
	 * Flag if the page is shared between multiple address spaces. This
	 * is later used when determining whether to group tasks together
	 */
	if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
	if (page_mapcount(page) > 1 && (fe->vma_flags & VM_SHARED))
		flags |= TNF_SHARED;

	last_cpupid = page_cpupid_last(page);
@@ -3493,7 +3493,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
				fe->flags);

	/* COW handled on pte level: split pmd */
	VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma);
	VM_BUG_ON_VMA(fe->vma_flags & VM_SHARED, fe->vma);
	split_huge_pmd(fe->vma, fe->pmd, fe->address);

	return VM_FAULT_FALLBACK;
@@ -3614,6 +3614,8 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
		.vma = vma,
		.address = address,
		.flags = flags,
		.vma_flags = vma->vm_flags,
		.vma_page_prot = vma->vm_page_prot,
	};
	struct mm_struct *mm = vma->vm_mm;
	pgd_t *pgd;
+1 −1
Original line number Diff line number Diff line
@@ -241,7 +241,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,

	/* Recheck VMA as permissions can change since migration started  */
	if (is_write_migration_entry(entry))
		pte = maybe_mkwrite(pte, vma);
		pte = maybe_mkwrite(pte, vma->vm_flags);

#ifdef CONFIG_HUGETLB_PAGE
	if (PageHuge(new)) {