Loading include/linux/mm.h +17 −5 Original line number Diff line number Diff line Loading @@ -1411,25 +1411,37 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT extern int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags); unsigned int flags, struct vm_area_struct **vma); static inline int handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags) unsigned int flags, struct vm_area_struct **vma) { /* * Try speculative page fault for multithreaded user space task only. */ if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) { *vma = NULL; return VM_FAULT_RETRY; return __handle_speculative_fault(mm, address, flags); } return __handle_speculative_fault(mm, address, flags, vma); } extern bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address); #else static inline int handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags) unsigned int flags, struct vm_area_struct **vma) { return VM_FAULT_RETRY; } static inline bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address) { return false; } #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, Loading include/linux/vm_event_item.h +3 −0 Original line number Diff line number Diff line Loading @@ -110,6 +110,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PGPGOUTCLEAN, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, #endif #ifdef CONFIG_SPECULATIVE_PAGE_FAULT SPECULATIVE_PGFAULT, #endif NR_VM_EVENT_ITEMS }; Loading mm/memory.c +86 −54 Original line number Diff line number Diff line Loading @@ -4270,13 +4270,22 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* This is required by vm_normal_page() */ #error "Speculative page fault handler requires __HAVE_ARCH_PTE_SPECIAL" #endif /* * vm_normal_page() adds some processing which should be done while * hodling the mmap_sem. */ /* * Tries to handle the page fault in a speculative way, without grabbing the * mmap_sem. * When VM_FAULT_RETRY is returned, the vma pointer is valid and this vma must * be checked later when the mmap_sem has been grabbed by calling * can_reuse_spf_vma(). * This is needed as the returned vma is kept in memory until the call to * can_reuse_spf_vma() is made. */ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags) unsigned int flags, struct vm_area_struct **vma) { struct vm_fault vmf = { .address = address, Loading @@ -4284,22 +4293,22 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, pgd_t *pgd, pgdval; p4d_t *p4d, p4dval; pud_t pudval; int seq, ret = VM_FAULT_RETRY; struct vm_area_struct *vma; int seq, ret; /* Clear flags that may lead to release the mmap_sem to retry */ flags &= ~(FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_KILLABLE); flags |= FAULT_FLAG_SPECULATIVE; vma = get_vma(mm, address); if (!vma) return ret; *vma = get_vma(mm, address); if (!*vma) return VM_FAULT_RETRY; vmf.vma = *vma; /* rmb <-> seqlock,vma_rb_erase() */ seq = raw_read_seqcount(&vma->vm_sequence); seq = raw_read_seqcount(&vmf.vma->vm_sequence); if (seq & 1) { trace_spf_vma_changed(_RET_IP_, vma, address); goto out_put; trace_spf_vma_changed(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } /* Loading @@ -4307,9 +4316,9 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * with the VMA. * This include huge page from hugetlbfs. */ if (vma->vm_ops) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; if (vmf.vma->vm_ops) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } /* Loading @@ -4317,18 +4326,18 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * because vm_next and vm_prev must be safe. This can't be guaranteed * in the speculative path. */ if (unlikely(!vma->anon_vma)) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; if (unlikely(!vmf.vma->anon_vma)) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } vmf.vma_flags = READ_ONCE(vma->vm_flags); vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot); vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags); vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot); /* Can't call userland page fault handler in the speculative path */ if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) { Loading @@ -4337,36 +4346,27 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * boundaries but we want to trace it as not supported instead * of changed. */ trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } if (address < READ_ONCE(vma->vm_start) || READ_ONCE(vma->vm_end) <= address) { trace_spf_vma_changed(_RET_IP_, vma, address); goto out_put; if (address < READ_ONCE(vmf.vma->vm_start) || READ_ONCE(vmf.vma->vm_end) <= address) { trace_spf_vma_changed(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, if (!arch_vma_access_permitted(vmf.vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) { trace_spf_vma_access(_RET_IP_, vma, address); ret = VM_FAULT_SIGSEGV; goto out_put; } flags & FAULT_FLAG_REMOTE)) goto out_segv; /* This is one is required to check that the VMA has write access set */ if (flags & FAULT_FLAG_WRITE) { if (unlikely(!(vmf.vma_flags & VM_WRITE))) { trace_spf_vma_access(_RET_IP_, vma, address); ret = VM_FAULT_SIGSEGV; goto out_put; } } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) { trace_spf_vma_access(_RET_IP_, vma, address); ret = VM_FAULT_SIGSEGV; goto out_put; } if (unlikely(!(vmf.vma_flags & VM_WRITE))) goto out_segv; } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) goto out_segv; #ifdef CONFIG_NUMA struct mempolicy *pol; Loading @@ -4376,13 +4376,13 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * mpol_misplaced() which are not compatible with the *speculative page fault processing. */ pol = __get_vma_policy(vma, address); pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); if (!pol) if (pol && pol->mode == MPOL_INTERLEAVE) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } #endif Loading Loading @@ -4444,9 +4444,8 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, vmf.pte = NULL; } vmf.vma = vma; vmf.pgoff = linear_page_index(vma, address); vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.pgoff = linear_page_index(vmf.vma, address); vmf.gfp_mask = __get_fault_gfp_mask(vmf.vma); vmf.sequence = seq; vmf.flags = flags; Loading @@ -4456,16 +4455,23 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * We need to re-validate the VMA after checking the bounds, otherwise * we might have a false positive on the bounds. */ if (read_seqcount_retry(&vma->vm_sequence, seq)) { trace_spf_vma_changed(_RET_IP_, vma, address); goto out_put; if (read_seqcount_retry(&vmf.vma->vm_sequence, seq)) { trace_spf_vma_changed(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } mem_cgroup_oom_enable(); ret = handle_pte_fault(&vmf); mem_cgroup_oom_disable(); put_vma(vma); /* * If there is no need to retry, don't return the vma to the caller. */ if (ret != VM_FAULT_RETRY) { count_vm_event(SPECULATIVE_PGFAULT); put_vma(vmf.vma); *vma = NULL; } /* * The task may have entered a memcg OOM situation but Loading @@ -4478,9 +4484,35 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, return ret; out_walk: trace_spf_vma_notsup(_RET_IP_, vma, address); trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); local_irq_enable(); out_put: return VM_FAULT_RETRY; out_segv: trace_spf_vma_access(_RET_IP_, vmf.vma, address); /* * We don't return VM_FAULT_RETRY so the caller is not expected to * retrieve the fetched VMA. */ put_vma(vmf.vma); *vma = NULL; return VM_FAULT_SIGSEGV; } /* * This is used to know if the vma fetch in the speculative page fault handler * is still valid when trying the regular fault path while holding the * mmap_sem. * The call to put_vma(vma) must be made after checking the vma's fields, as * the vma may be freed by put_vma(). In such a case it is expected that false * is returned. */ bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address) { bool ret; ret = !RB_EMPTY_NODE(&vma->vm_rb) && vma->vm_start <= address && address < vma->vm_end; put_vma(vma); return ret; } Loading mm/vmstat.c +4 −1 Original line number Diff line number Diff line Loading @@ -1219,7 +1219,10 @@ const char * const vmstat_text[] = { "swap_ra", "swap_ra_hit", #endif #endif /* CONFIG_VM_EVENTS_COUNTERS */ #ifdef CONFIG_SPECULATIVE_PAGE_FAULT "speculative_pgfault" #endif #endif /* CONFIG_VM_EVENT_COUNTERS */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ Loading Loading
include/linux/mm.h +17 −5 Original line number Diff line number Diff line Loading @@ -1411,25 +1411,37 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, #ifdef CONFIG_SPECULATIVE_PAGE_FAULT extern int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags); unsigned int flags, struct vm_area_struct **vma); static inline int handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags) unsigned int flags, struct vm_area_struct **vma) { /* * Try speculative page fault for multithreaded user space task only. */ if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) == 1) { *vma = NULL; return VM_FAULT_RETRY; return __handle_speculative_fault(mm, address, flags); } return __handle_speculative_fault(mm, address, flags, vma); } extern bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address); #else static inline int handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags) unsigned int flags, struct vm_area_struct **vma) { return VM_FAULT_RETRY; } static inline bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address) { return false; } #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, Loading
include/linux/vm_event_item.h +3 −0 Original line number Diff line number Diff line Loading @@ -110,6 +110,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PGPGOUTCLEAN, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, #endif #ifdef CONFIG_SPECULATIVE_PAGE_FAULT SPECULATIVE_PGFAULT, #endif NR_VM_EVENT_ITEMS }; Loading
mm/memory.c +86 −54 Original line number Diff line number Diff line Loading @@ -4270,13 +4270,22 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* This is required by vm_normal_page() */ #error "Speculative page fault handler requires __HAVE_ARCH_PTE_SPECIAL" #endif /* * vm_normal_page() adds some processing which should be done while * hodling the mmap_sem. */ /* * Tries to handle the page fault in a speculative way, without grabbing the * mmap_sem. * When VM_FAULT_RETRY is returned, the vma pointer is valid and this vma must * be checked later when the mmap_sem has been grabbed by calling * can_reuse_spf_vma(). * This is needed as the returned vma is kept in memory until the call to * can_reuse_spf_vma() is made. */ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, unsigned int flags) unsigned int flags, struct vm_area_struct **vma) { struct vm_fault vmf = { .address = address, Loading @@ -4284,22 +4293,22 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, pgd_t *pgd, pgdval; p4d_t *p4d, p4dval; pud_t pudval; int seq, ret = VM_FAULT_RETRY; struct vm_area_struct *vma; int seq, ret; /* Clear flags that may lead to release the mmap_sem to retry */ flags &= ~(FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_KILLABLE); flags |= FAULT_FLAG_SPECULATIVE; vma = get_vma(mm, address); if (!vma) return ret; *vma = get_vma(mm, address); if (!*vma) return VM_FAULT_RETRY; vmf.vma = *vma; /* rmb <-> seqlock,vma_rb_erase() */ seq = raw_read_seqcount(&vma->vm_sequence); seq = raw_read_seqcount(&vmf.vma->vm_sequence); if (seq & 1) { trace_spf_vma_changed(_RET_IP_, vma, address); goto out_put; trace_spf_vma_changed(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } /* Loading @@ -4307,9 +4316,9 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * with the VMA. * This include huge page from hugetlbfs. */ if (vma->vm_ops) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; if (vmf.vma->vm_ops) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } /* Loading @@ -4317,18 +4326,18 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * because vm_next and vm_prev must be safe. This can't be guaranteed * in the speculative path. */ if (unlikely(!vma->anon_vma)) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; if (unlikely(!vmf.vma->anon_vma)) { trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } vmf.vma_flags = READ_ONCE(vma->vm_flags); vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot); vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags); vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot); /* Can't call userland page fault handler in the speculative path */ if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) { Loading @@ -4337,36 +4346,27 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * boundaries but we want to trace it as not supported instead * of changed. */ trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } if (address < READ_ONCE(vma->vm_start) || READ_ONCE(vma->vm_end) <= address) { trace_spf_vma_changed(_RET_IP_, vma, address); goto out_put; if (address < READ_ONCE(vmf.vma->vm_start) || READ_ONCE(vmf.vma->vm_end) <= address) { trace_spf_vma_changed(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, if (!arch_vma_access_permitted(vmf.vma, flags & FAULT_FLAG_WRITE, flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) { trace_spf_vma_access(_RET_IP_, vma, address); ret = VM_FAULT_SIGSEGV; goto out_put; } flags & FAULT_FLAG_REMOTE)) goto out_segv; /* This is one is required to check that the VMA has write access set */ if (flags & FAULT_FLAG_WRITE) { if (unlikely(!(vmf.vma_flags & VM_WRITE))) { trace_spf_vma_access(_RET_IP_, vma, address); ret = VM_FAULT_SIGSEGV; goto out_put; } } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) { trace_spf_vma_access(_RET_IP_, vma, address); ret = VM_FAULT_SIGSEGV; goto out_put; } if (unlikely(!(vmf.vma_flags & VM_WRITE))) goto out_segv; } else if (unlikely(!(vmf.vma_flags & (VM_READ|VM_EXEC|VM_WRITE)))) goto out_segv; #ifdef CONFIG_NUMA struct mempolicy *pol; Loading @@ -4376,13 +4376,13 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * mpol_misplaced() which are not compatible with the *speculative page fault processing. */ pol = __get_vma_policy(vma, address); pol = __get_vma_policy(vmf.vma, address); if (!pol) pol = get_task_policy(current); if (!pol) if (pol && pol->mode == MPOL_INTERLEAVE) { trace_spf_vma_notsup(_RET_IP_, vma, address); goto out_put; trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } #endif Loading Loading @@ -4444,9 +4444,8 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, vmf.pte = NULL; } vmf.vma = vma; vmf.pgoff = linear_page_index(vma, address); vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.pgoff = linear_page_index(vmf.vma, address); vmf.gfp_mask = __get_fault_gfp_mask(vmf.vma); vmf.sequence = seq; vmf.flags = flags; Loading @@ -4456,16 +4455,23 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, * We need to re-validate the VMA after checking the bounds, otherwise * we might have a false positive on the bounds. */ if (read_seqcount_retry(&vma->vm_sequence, seq)) { trace_spf_vma_changed(_RET_IP_, vma, address); goto out_put; if (read_seqcount_retry(&vmf.vma->vm_sequence, seq)) { trace_spf_vma_changed(_RET_IP_, vmf.vma, address); return VM_FAULT_RETRY; } mem_cgroup_oom_enable(); ret = handle_pte_fault(&vmf); mem_cgroup_oom_disable(); put_vma(vma); /* * If there is no need to retry, don't return the vma to the caller. */ if (ret != VM_FAULT_RETRY) { count_vm_event(SPECULATIVE_PGFAULT); put_vma(vmf.vma); *vma = NULL; } /* * The task may have entered a memcg OOM situation but Loading @@ -4478,9 +4484,35 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address, return ret; out_walk: trace_spf_vma_notsup(_RET_IP_, vma, address); trace_spf_vma_notsup(_RET_IP_, vmf.vma, address); local_irq_enable(); out_put: return VM_FAULT_RETRY; out_segv: trace_spf_vma_access(_RET_IP_, vmf.vma, address); /* * We don't return VM_FAULT_RETRY so the caller is not expected to * retrieve the fetched VMA. */ put_vma(vmf.vma); *vma = NULL; return VM_FAULT_SIGSEGV; } /* * This is used to know if the vma fetch in the speculative page fault handler * is still valid when trying the regular fault path while holding the * mmap_sem. * The call to put_vma(vma) must be made after checking the vma's fields, as * the vma may be freed by put_vma(). In such a case it is expected that false * is returned. */ bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address) { bool ret; ret = !RB_EMPTY_NODE(&vma->vm_rb) && vma->vm_start <= address && address < vma->vm_end; put_vma(vma); return ret; } Loading
mm/vmstat.c +4 −1 Original line number Diff line number Diff line Loading @@ -1219,7 +1219,10 @@ const char * const vmstat_text[] = { "swap_ra", "swap_ra_hit", #endif #endif /* CONFIG_VM_EVENTS_COUNTERS */ #ifdef CONFIG_SPECULATIVE_PAGE_FAULT "speculative_pgfault" #endif #endif /* CONFIG_VM_EVENT_COUNTERS */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ Loading