Loading include/linux/mm.h +6 −0 Original line number Diff line number Diff line Loading @@ -1542,6 +1542,12 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT static inline void vm_write_begin(struct vm_area_struct *vma) { /* * Isolated vma might be freed without exclusive mmap_lock but * speculative page fault handler still needs to know it was changed. */ if (!RB_EMPTY_NODE(&vma->vm_rb)) WARN_ON_ONCE(!rwsem_is_locked(&(vma->vm_mm)->mmap_sem)); /* * The reads never spins and preemption * disablement is not required. Loading mm/filemap.c +49 −1 Original line number Diff line number Diff line Loading @@ -2495,7 +2495,9 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. * * vma->vm_mm->mmap_sem must be held on entry (except FAULT_FLAG_SPECULATIVE). * If FAULT_FLAG_SPECULATIVE is set, this function runs with elevated vma * refcount and with mmap lock not held. * Otherwise, vma->vm_mm->mmap_sem must be held on entry. * * If our return value has VM_FAULT_RETRY set, it's because the mmap_sem * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). Loading @@ -2520,6 +2522,52 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct page *page; vm_fault_t ret = 0; if (vmf->flags & FAULT_FLAG_SPECULATIVE) { page = find_get_page(mapping, offset); if (unlikely(!page)) return VM_FAULT_RETRY; if (unlikely(PageReadahead(page))) goto page_put; if (!trylock_page(page)) goto page_put; if (unlikely(compound_head(page)->mapping != mapping)) goto page_unlock; VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); if (unlikely(!PageUptodate(page))) goto page_unlock; max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) goto page_unlock; /* * Update readahead mmap_miss statistic. * * Note that we are not sure if finish_fault() will * manage to complete the transaction. If it fails, * we'll come back to filemap_fault() non-speculative * case which will update mmap_miss a second time. * This is not ideal, we would prefer to guarantee the * update will happen exactly once. */ if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) { unsigned int mmap_miss = READ_ONCE(ra->mmap_miss); if (mmap_miss) WRITE_ONCE(ra->mmap_miss, --mmap_miss); } vmf->page = page; return VM_FAULT_LOCKED; page_unlock: unlock_page(page); page_put: put_page(page); return VM_FAULT_RETRY; } max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) return VM_FAULT_SIGBUS; Loading mm/khugepaged.c +5 −0 Original line number Diff line number Diff line Loading @@ -1343,6 +1343,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!pmd) goto drop_hpage; vm_write_begin(vma); start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); /* step 1: check all mapped PTEs are to the right huge page */ Loading Loading @@ -1392,6 +1393,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ptl = pmd_lock(vma->vm_mm, pmd); _pmd = pmdp_collapse_flush(vma, haddr, pmd); spin_unlock(ptl); vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); Loading @@ -1402,6 +1404,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) abort: pte_unmap_unlock(start_pte, ptl); vm_write_end(vma); goto drop_hpage; } Loading Loading @@ -1473,10 +1476,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (down_write_trylock(&mm->mmap_sem)) { if (!khugepaged_test_exit(mm)) { vm_write_begin(vma); spinlock_t *ptl = pmd_lock(mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); spin_unlock(ptl); vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); } Loading mm/madvise.c +0 −6 Original line number Diff line number Diff line Loading @@ -500,11 +500,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, .target_task = task, }; vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); vm_write_end(vma); } static long madvise_cold(struct task_struct *task, Loading Loading @@ -538,11 +536,9 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb, .target_task = task, }; vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); vm_write_end(vma); } static inline bool can_do_pageout(struct vm_area_struct *vma) Loading Loading @@ -745,12 +741,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); vm_write_begin(vma); tlb_start_vma(&tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, &madvise_free_walk_ops, &tlb); tlb_end_vma(&tlb, vma); vm_write_end(vma); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, range.start, range.end); Loading mm/memory.c +25 −17 Original line number Diff line number Diff line Loading @@ -1292,7 +1292,6 @@ void unmap_page_range(struct mmu_gather *tlb, unsigned long next; BUG_ON(addr >= end); vm_write_begin(vma); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { Loading @@ -1302,7 +1301,6 @@ void unmap_page_range(struct mmu_gather *tlb, next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); vm_write_end(vma); } Loading Loading @@ -3050,6 +3048,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) int exclusive = 0; vm_fault_t ret; if (vmf->flags & FAULT_FLAG_SPECULATIVE) { pte_unmap(vmf->pte); return VM_FAULT_RETRY; } ret = pte_unmap_same(vmf); if (ret) { /* Loading Loading @@ -3296,6 +3299,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (vmf->vma_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* Do not check unstable pmd, if it's changed will retry later */ if (vmf->flags & FAULT_FLAG_SPECULATIVE) goto skip_pmd_checks; /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created Loading @@ -3313,6 +3320,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; skip_pmd_checks: /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { Loading Loading @@ -3417,6 +3425,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; /* Do not check unstable pmd, if it's changed will retry later */ if (vmf->flags & FAULT_FLAG_SPECULATIVE) goto skip_pmd_checks; /* * Preallocate pte before we take page_lock because this might lead to * deadlocks for memcg reclaim which waits for pages under writeback: Loading @@ -3439,6 +3451,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) smp_wmb(); /* See comment in __pte_alloc() */ } skip_pmd_checks: ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) Loading Loading @@ -3812,7 +3825,8 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); if (pmd_none(*vmf->pmd)) { if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) && pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) goto out; Loading Loading @@ -4179,16 +4193,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) pte_t entry; vm_fault_t ret = 0; if (unlikely(pmd_none(*vmf->pmd))) { /* * In the case of the speculative page fault handler we abort * the speculative path immediately as the pmd is probably * in the way to be converted in a huge one. We will try * again holding the mmap_sem (which implies that the collapse * operation is done). */ /* Do not check unstable pmd, if it's changed will retry later */ if (vmf->flags & FAULT_FLAG_SPECULATIVE) return VM_FAULT_RETRY; goto skip_pmd_checks; if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table Loading @@ -4196,8 +4205,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) * concurrent faults and from rmap lookups. */ vmf->pte = NULL; } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) { /* See comment in pte_alloc_one_map() */ } else { if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* Loading Loading @@ -4227,6 +4235,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } } skip_pmd_checks: if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); Loading Loading @@ -4465,7 +4474,6 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); if (!pol) if (pol && pol->mode == MPOL_INTERLEAVE) return VM_FAULT_RETRY; #endif Loading Loading
include/linux/mm.h +6 −0 Original line number Diff line number Diff line Loading @@ -1542,6 +1542,12 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT static inline void vm_write_begin(struct vm_area_struct *vma) { /* * Isolated vma might be freed without exclusive mmap_lock but * speculative page fault handler still needs to know it was changed. */ if (!RB_EMPTY_NODE(&vma->vm_rb)) WARN_ON_ONCE(!rwsem_is_locked(&(vma->vm_mm)->mmap_sem)); /* * The reads never spins and preemption * disablement is not required. Loading
mm/filemap.c +49 −1 Original line number Diff line number Diff line Loading @@ -2495,7 +2495,9 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. * * vma->vm_mm->mmap_sem must be held on entry (except FAULT_FLAG_SPECULATIVE). * If FAULT_FLAG_SPECULATIVE is set, this function runs with elevated vma * refcount and with mmap lock not held. * Otherwise, vma->vm_mm->mmap_sem must be held on entry. * * If our return value has VM_FAULT_RETRY set, it's because the mmap_sem * may be dropped before doing I/O or by lock_page_maybe_drop_mmap(). Loading @@ -2520,6 +2522,52 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct page *page; vm_fault_t ret = 0; if (vmf->flags & FAULT_FLAG_SPECULATIVE) { page = find_get_page(mapping, offset); if (unlikely(!page)) return VM_FAULT_RETRY; if (unlikely(PageReadahead(page))) goto page_put; if (!trylock_page(page)) goto page_put; if (unlikely(compound_head(page)->mapping != mapping)) goto page_unlock; VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page); if (unlikely(!PageUptodate(page))) goto page_unlock; max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) goto page_unlock; /* * Update readahead mmap_miss statistic. * * Note that we are not sure if finish_fault() will * manage to complete the transaction. If it fails, * we'll come back to filemap_fault() non-speculative * case which will update mmap_miss a second time. * This is not ideal, we would prefer to guarantee the * update will happen exactly once. */ if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) { unsigned int mmap_miss = READ_ONCE(ra->mmap_miss); if (mmap_miss) WRITE_ONCE(ra->mmap_miss, --mmap_miss); } vmf->page = page; return VM_FAULT_LOCKED; page_unlock: unlock_page(page); page_put: put_page(page); return VM_FAULT_RETRY; } max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(offset >= max_off)) return VM_FAULT_SIGBUS; Loading
mm/khugepaged.c +5 −0 Original line number Diff line number Diff line Loading @@ -1343,6 +1343,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!pmd) goto drop_hpage; vm_write_begin(vma); start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); /* step 1: check all mapped PTEs are to the right huge page */ Loading Loading @@ -1392,6 +1393,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) ptl = pmd_lock(vma->vm_mm, pmd); _pmd = pmdp_collapse_flush(vma, haddr, pmd); spin_unlock(ptl); vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); Loading @@ -1402,6 +1404,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) abort: pte_unmap_unlock(start_pte, ptl); vm_write_end(vma); goto drop_hpage; } Loading Loading @@ -1473,10 +1476,12 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (down_write_trylock(&mm->mmap_sem)) { if (!khugepaged_test_exit(mm)) { vm_write_begin(vma); spinlock_t *ptl = pmd_lock(mm, pmd); /* assume page table is clear */ _pmd = pmdp_collapse_flush(vma, addr, pmd); spin_unlock(ptl); vm_write_end(vma); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); } Loading
mm/madvise.c +0 −6 Original line number Diff line number Diff line Loading @@ -500,11 +500,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb, .target_task = task, }; vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); vm_write_end(vma); } static long madvise_cold(struct task_struct *task, Loading Loading @@ -538,11 +536,9 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb, .target_task = task, }; vm_write_begin(vma); tlb_start_vma(tlb, vma); walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); tlb_end_vma(tlb, vma); vm_write_end(vma); } static inline bool can_do_pageout(struct vm_area_struct *vma) Loading Loading @@ -745,12 +741,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); vm_write_begin(vma); tlb_start_vma(&tlb, vma); walk_page_range(vma->vm_mm, range.start, range.end, &madvise_free_walk_ops, &tlb); tlb_end_vma(&tlb, vma); vm_write_end(vma); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, range.start, range.end); Loading
mm/memory.c +25 −17 Original line number Diff line number Diff line Loading @@ -1292,7 +1292,6 @@ void unmap_page_range(struct mmu_gather *tlb, unsigned long next; BUG_ON(addr >= end); vm_write_begin(vma); tlb_start_vma(tlb, vma); pgd = pgd_offset(vma->vm_mm, addr); do { Loading @@ -1302,7 +1301,6 @@ void unmap_page_range(struct mmu_gather *tlb, next = zap_p4d_range(tlb, vma, pgd, addr, next, details); } while (pgd++, addr = next, addr != end); tlb_end_vma(tlb, vma); vm_write_end(vma); } Loading Loading @@ -3050,6 +3048,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) int exclusive = 0; vm_fault_t ret; if (vmf->flags & FAULT_FLAG_SPECULATIVE) { pte_unmap(vmf->pte); return VM_FAULT_RETRY; } ret = pte_unmap_same(vmf); if (ret) { /* Loading Loading @@ -3296,6 +3299,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (vmf->vma_flags & VM_SHARED) return VM_FAULT_SIGBUS; /* Do not check unstable pmd, if it's changed will retry later */ if (vmf->flags & FAULT_FLAG_SPECULATIVE) goto skip_pmd_checks; /* * Use pte_alloc() instead of pte_alloc_map(). We can't run * pte_offset_map() on pmds where a huge pmd might be created Loading @@ -3313,6 +3320,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; skip_pmd_checks: /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { Loading Loading @@ -3417,6 +3425,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; /* Do not check unstable pmd, if it's changed will retry later */ if (vmf->flags & FAULT_FLAG_SPECULATIVE) goto skip_pmd_checks; /* * Preallocate pte before we take page_lock because this might lead to * deadlocks for memcg reclaim which waits for pages under writeback: Loading @@ -3439,6 +3451,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) smp_wmb(); /* See comment in __pte_alloc() */ } skip_pmd_checks: ret = vma->vm_ops->fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY | VM_FAULT_DONE_COW))) Loading Loading @@ -3812,7 +3825,8 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); if (pmd_none(*vmf->pmd)) { if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) && pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) goto out; Loading Loading @@ -4179,16 +4193,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) pte_t entry; vm_fault_t ret = 0; if (unlikely(pmd_none(*vmf->pmd))) { /* * In the case of the speculative page fault handler we abort * the speculative path immediately as the pmd is probably * in the way to be converted in a huge one. We will try * again holding the mmap_sem (which implies that the collapse * operation is done). */ /* Do not check unstable pmd, if it's changed will retry later */ if (vmf->flags & FAULT_FLAG_SPECULATIVE) return VM_FAULT_RETRY; goto skip_pmd_checks; if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table Loading @@ -4196,8 +4205,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) * concurrent faults and from rmap lookups. */ vmf->pte = NULL; } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) { /* See comment in pte_alloc_one_map() */ } else { if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* Loading Loading @@ -4227,6 +4235,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } } skip_pmd_checks: if (!vmf->pte) { if (vma_is_anonymous(vmf->vma)) return do_anonymous_page(vmf); Loading Loading @@ -4465,7 +4474,6 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); if (!pol) if (pol && pol->mode == MPOL_INTERLEAVE) return VM_FAULT_RETRY; #endif Loading