Merge "ANDROID: mm/filemap: Fix missing put_page() for speculative page fault" (6e5a826a) · Commits · e / devices / android_kernel_fairphone_FP5

include/linux/mm.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -1542,6 +1542,12 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
		#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
		static inline void vm_write_begin(struct vm_area_struct *vma)
		{
		/*
		* Isolated vma might be freed without exclusive mmap_lock but
		* speculative page fault handler still needs to know it was changed.
		*/
		if (!RB_EMPTY_NODE(&vma->vm_rb))
		WARN_ON_ONCE(!rwsem_is_locked(&(vma->vm_mm)->mmap_sem));
		/*
		* The reads never spins and preemption
		* disablement is not required.

mm/filemap.c

+49 −1

Original line number	Diff line number	Diff line
		@@ -2495,7 +2495,9 @@ static struct file do_async_mmap_readahead(struct vm_fault vmf,
		* it in the page cache, and handles the special cases reasonably without
		* having a lot of duplicated code.
		*
		* vma->vm_mm->mmap_sem must be held on entry (except FAULT_FLAG_SPECULATIVE).
		* If FAULT_FLAG_SPECULATIVE is set, this function runs with elevated vma
		* refcount and with mmap lock not held.
		* Otherwise, vma->vm_mm->mmap_sem must be held on entry.
		*
		* If our return value has VM_FAULT_RETRY set, it's because the mmap_sem
		* may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
		@@ -2520,6 +2522,52 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
		struct page *page;
		vm_fault_t ret = 0;

		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
		page = find_get_page(mapping, offset);
		if (unlikely(!page))
		return VM_FAULT_RETRY;

		if (unlikely(PageReadahead(page)))
		goto page_put;

		if (!trylock_page(page))
		goto page_put;

		if (unlikely(compound_head(page)->mapping != mapping))
		goto page_unlock;
		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
		if (unlikely(!PageUptodate(page)))
		goto page_unlock;

		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
		if (unlikely(offset >= max_off))
		goto page_unlock;

		/*
		* Update readahead mmap_miss statistic.
		*
		* Note that we are not sure if finish_fault() will
		* manage to complete the transaction. If it fails,
		* we'll come back to filemap_fault() non-speculative
		* case which will update mmap_miss a second time.
		* This is not ideal, we would prefer to guarantee the
		* update will happen exactly once.
		*/
		if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) {
		unsigned int mmap_miss = READ_ONCE(ra->mmap_miss);
		if (mmap_miss)
		WRITE_ONCE(ra->mmap_miss, --mmap_miss);
		}

		vmf->page = page;
		return VM_FAULT_LOCKED;
		page_unlock:
		unlock_page(page);
		page_put:
		put_page(page);
		return VM_FAULT_RETRY;
		}

		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
		if (unlikely(offset >= max_off))
		return VM_FAULT_SIGBUS;

mm/khugepaged.c

+5 −0

Original line number	Diff line number	Diff line
		@@ -1343,6 +1343,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
		if (!pmd)
		goto drop_hpage;

		vm_write_begin(vma);
		start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);

		/* step 1: check all mapped PTEs are to the right huge page */
		@@ -1392,6 +1393,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
		ptl = pmd_lock(vma->vm_mm, pmd);
		_pmd = pmdp_collapse_flush(vma, haddr, pmd);
		spin_unlock(ptl);
		vm_write_end(vma);
		mm_dec_nr_ptes(mm);
		pte_free(mm, pmd_pgtable(_pmd));

		@@ -1402,6 +1404,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)

		abort:
		pte_unmap_unlock(start_pte, ptl);
		vm_write_end(vma);
		goto drop_hpage;
		}

		@@ -1473,10 +1476,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
		*/
		if (down_write_trylock(&mm->mmap_sem)) {
		if (!khugepaged_test_exit(mm)) {
		vm_write_begin(vma);
		spinlock_t *ptl = pmd_lock(mm, pmd);
		/* assume page table is clear */
		_pmd = pmdp_collapse_flush(vma, addr, pmd);
		spin_unlock(ptl);
		vm_write_end(vma);
		mm_dec_nr_ptes(mm);
		pte_free(mm, pmd_pgtable(_pmd));
		}

mm/madvise.c

+0 −6

Original line number	Diff line number	Diff line
		@@ -500,11 +500,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
		.target_task = task,
		};

		vm_write_begin(vma);
		tlb_start_vma(tlb, vma);
		walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
		tlb_end_vma(tlb, vma);
		vm_write_end(vma);
		}

		static long madvise_cold(struct task_struct *task,
		@@ -538,11 +536,9 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
		.target_task = task,
		};

		vm_write_begin(vma);
		tlb_start_vma(tlb, vma);
		walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
		tlb_end_vma(tlb, vma);
		vm_write_end(vma);
		}

		static inline bool can_do_pageout(struct vm_area_struct *vma)
		@@ -745,12 +741,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
		update_hiwater_rss(mm);

		mmu_notifier_invalidate_range_start(&range);
		vm_write_begin(vma);
		tlb_start_vma(&tlb, vma);
		walk_page_range(vma->vm_mm, range.start, range.end,
		&madvise_free_walk_ops, &tlb);
		tlb_end_vma(&tlb, vma);
		vm_write_end(vma);
		mmu_notifier_invalidate_range_end(&range);
		tlb_finish_mmu(&tlb, range.start, range.end);

mm/memory.c

+25 −17

Original line number	Diff line number	Diff line
		@@ -1292,7 +1292,6 @@ void unmap_page_range(struct mmu_gather *tlb,
		unsigned long next;

		BUG_ON(addr >= end);
		vm_write_begin(vma);
		tlb_start_vma(tlb, vma);
		pgd = pgd_offset(vma->vm_mm, addr);
		do {
		@@ -1302,7 +1301,6 @@ void unmap_page_range(struct mmu_gather *tlb,
		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
		} while (pgd++, addr = next, addr != end);
		tlb_end_vma(tlb, vma);
		vm_write_end(vma);
		}


		@@ -3050,6 +3048,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
		int exclusive = 0;
		vm_fault_t ret;

		if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
		pte_unmap(vmf->pte);
		return VM_FAULT_RETRY;
		}

		ret = pte_unmap_same(vmf);
		if (ret) {
		/*
		@@ -3296,6 +3299,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
		if (vmf->vma_flags & VM_SHARED)
		return VM_FAULT_SIGBUS;

		/* Do not check unstable pmd, if it's changed will retry later */
		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
		goto skip_pmd_checks;

		/*
		* Use pte_alloc() instead of pte_alloc_map(). We can't run
		* pte_offset_map() on pmds where a huge pmd might be created
		@@ -3313,6 +3320,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
		if (unlikely(pmd_trans_unstable(vmf->pmd)))
		return 0;

		skip_pmd_checks:
		/* Use the zero-page for reads */
		if (!(vmf->flags & FAULT_FLAG_WRITE) &&
		!mm_forbids_zeropage(vma->vm_mm)) {
		@@ -3417,6 +3425,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
		struct vm_area_struct *vma = vmf->vma;
		vm_fault_t ret;

		/* Do not check unstable pmd, if it's changed will retry later */
		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
		goto skip_pmd_checks;

		/*
		* Preallocate pte before we take page_lock because this might lead to
		* deadlocks for memcg reclaim which waits for pages under writeback:
		@@ -3439,6 +3451,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
		smp_wmb(); /* See comment in __pte_alloc() */
		}

		skip_pmd_checks:
		ret = vma->vm_ops->fault(vmf);
		if (unlikely(ret & (VM_FAULT_ERROR \| VM_FAULT_NOPAGE \| VM_FAULT_RETRY \|
		VM_FAULT_DONE_COW)))
		@@ -3812,7 +3825,8 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
		end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
		start_pgoff + nr_pages - 1);

		if (pmd_none(*vmf->pmd)) {
		if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
		pmd_none(*vmf->pmd)) {
		vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
		if (!vmf->prealloc_pte)
		goto out;
		@@ -4179,16 +4193,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
		pte_t entry;
		vm_fault_t ret = 0;

		if (unlikely(pmd_none(*vmf->pmd))) {
		/*
		* In the case of the speculative page fault handler we abort
		* the speculative path immediately as the pmd is probably
		* in the way to be converted in a huge one. We will try
		* again holding the mmap_sem (which implies that the collapse
		* operation is done).
		*/
		/* Do not check unstable pmd, if it's changed will retry later */
		if (vmf->flags & FAULT_FLAG_SPECULATIVE)
		return VM_FAULT_RETRY;
		goto skip_pmd_checks;

		if (unlikely(pmd_none(*vmf->pmd))) {
		/*
		* Leave __pte_alloc() until later: because vm_ops->fault may
		* want to allocate huge page, and if we expose page table
		@@ -4196,8 +4205,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
		* concurrent faults and from rmap lookups.
		*/
		vmf->pte = NULL;
		} else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
		/* See comment in pte_alloc_one_map() */
		} else {
		if (pmd_devmap_trans_unstable(vmf->pmd))
		return 0;
		/*
		@@ -4227,6 +4235,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
		}
		}

		skip_pmd_checks:
		if (!vmf->pte) {
		if (vma_is_anonymous(vmf->vma))
		return do_anonymous_page(vmf);
		@@ -4465,7 +4474,6 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		pol = __get_vma_policy(vmf.vma, address);
		if (!pol)
		pol = get_task_policy(current);
		if (!pol)
		if (pol && pol->mode == MPOL_INTERLEAVE)
		return VM_FAULT_RETRY;
		#endif