Merge "mm: add speculative page fault vmstats" (75d5a3df) · Commits · e / devices / android_kernel_oneplus_sm8150

include/linux/mm.h

+17 −5

Original line number	Diff line number	Diff line
		@@ -1411,25 +1411,37 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
		#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
		extern int __handle_speculative_fault(struct mm_struct *mm,
		unsigned long address,
		unsigned int flags);
		unsigned int flags,
		struct vm_area_struct **vma);
		static inline int handle_speculative_fault(struct mm_struct *mm,
		unsigned long address,
		unsigned int flags)
		unsigned int flags,
		struct vm_area_struct **vma)
		{
		/*
		* Try speculative page fault for multithreaded user space task only.
		*/
		if (!(flags & FAULT_FLAG_USER) \|\| atomic_read(&mm->mm_users) == 1)
		if (!(flags & FAULT_FLAG_USER) \|\| atomic_read(&mm->mm_users) == 1) {
		*vma = NULL;
		return VM_FAULT_RETRY;
		return __handle_speculative_fault(mm, address, flags);
		}
		return __handle_speculative_fault(mm, address, flags, vma);
		}
		extern bool can_reuse_spf_vma(struct vm_area_struct *vma,
		unsigned long address);
		#else
		static inline int handle_speculative_fault(struct mm_struct *mm,
		unsigned long address,
		unsigned int flags)
		unsigned int flags,
		struct vm_area_struct **vma)
		{
		return VM_FAULT_RETRY;
		}
		static inline bool can_reuse_spf_vma(struct vm_area_struct *vma,
		unsigned long address)
		{
		return false;
		}
		#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */

		extern int fixup_user_fault(struct task_struct tsk, struct mm_struct mm,

include/linux/vm_event_item.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -110,6 +110,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PGPGOUTCLEAN, PSWPIN, PSWPOUT,
		#ifdef CONFIG_SWAP
		SWAP_RA,
		SWAP_RA_HIT,
		#endif
		#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
		SPECULATIVE_PGFAULT,
		#endif
		NR_VM_EVENT_ITEMS
		};

mm/memory.c

+86 −54

Original line number	Diff line number	Diff line
		@@ -4270,13 +4270,22 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
		/* This is required by vm_normal_page() */
		#error "Speculative page fault handler requires __HAVE_ARCH_PTE_SPECIAL"
		#endif

		/*
		* vm_normal_page() adds some processing which should be done while
		* hodling the mmap_sem.
		*/

		/*
		* Tries to handle the page fault in a speculative way, without grabbing the
		* mmap_sem.
		* When VM_FAULT_RETRY is returned, the vma pointer is valid and this vma must
		* be checked later when the mmap_sem has been grabbed by calling
		* can_reuse_spf_vma().
		* This is needed as the returned vma is kept in memory until the call to
		* can_reuse_spf_vma() is made.
		*/
		int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		unsigned int flags)
		unsigned int flags, struct vm_area_struct **vma)
		{
		struct vm_fault vmf = {
		.address = address,
		@@ -4284,22 +4293,22 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		pgd_t *pgd, pgdval;
		p4d_t *p4d, p4dval;
		pud_t pudval;
		int seq, ret = VM_FAULT_RETRY;
		struct vm_area_struct *vma;
		int seq, ret;

		/* Clear flags that may lead to release the mmap_sem to retry */
		flags &= ~(FAULT_FLAG_ALLOW_RETRY\|FAULT_FLAG_KILLABLE);
		flags \|= FAULT_FLAG_SPECULATIVE;

		vma = get_vma(mm, address);
		if (!vma)
		return ret;
		*vma = get_vma(mm, address);
		if (!*vma)
		return VM_FAULT_RETRY;
		vmf.vma = *vma;

		/* rmb <-> seqlock,vma_rb_erase() */
		seq = raw_read_seqcount(&vma->vm_sequence);
		seq = raw_read_seqcount(&vmf.vma->vm_sequence);
		if (seq & 1) {
		trace_spf_vma_changed(_RET_IP_, vma, address);
		goto out_put;
		trace_spf_vma_changed(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		/*
		@@ -4307,9 +4316,9 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		* with the VMA.
		* This include huge page from hugetlbfs.
		*/
		if (vma->vm_ops) {
		trace_spf_vma_notsup(_RET_IP_, vma, address);
		goto out_put;
		if (vmf.vma->vm_ops) {
		trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		/*
		@@ -4317,18 +4326,18 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		* because vm_next and vm_prev must be safe. This can't be guaranteed
		* in the speculative path.
		*/
		if (unlikely(!vma->anon_vma)) {
		trace_spf_vma_notsup(_RET_IP_, vma, address);
		goto out_put;
		if (unlikely(!vmf.vma->anon_vma)) {
		trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		vmf.vma_flags = READ_ONCE(vma->vm_flags);
		vmf.vma_page_prot = READ_ONCE(vma->vm_page_prot);
		vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
		vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);

		/* Can't call userland page fault handler in the speculative path */
		if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) {
		trace_spf_vma_notsup(_RET_IP_, vma, address);
		goto out_put;
		trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		if (vmf.vma_flags & VM_GROWSDOWN \|\| vmf.vma_flags & VM_GROWSUP) {
		@@ -4337,36 +4346,27 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		* boundaries but we want to trace it as not supported instead
		* of changed.
		*/
		trace_spf_vma_notsup(_RET_IP_, vma, address);
		goto out_put;
		trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		if (address < READ_ONCE(vma->vm_start)
		\|\| READ_ONCE(vma->vm_end) <= address) {
		trace_spf_vma_changed(_RET_IP_, vma, address);
		goto out_put;
		if (address < READ_ONCE(vmf.vma->vm_start)
		\|\| READ_ONCE(vmf.vma->vm_end) <= address) {
		trace_spf_vma_changed(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
		if (!arch_vma_access_permitted(vmf.vma, flags & FAULT_FLAG_WRITE,
		flags & FAULT_FLAG_INSTRUCTION,
		flags & FAULT_FLAG_REMOTE)) {
		trace_spf_vma_access(_RET_IP_, vma, address);
		ret = VM_FAULT_SIGSEGV;
		goto out_put;
		}
		flags & FAULT_FLAG_REMOTE))
		goto out_segv;

		/* This is one is required to check that the VMA has write access set */
		if (flags & FAULT_FLAG_WRITE) {
		if (unlikely(!(vmf.vma_flags & VM_WRITE))) {
		trace_spf_vma_access(_RET_IP_, vma, address);
		ret = VM_FAULT_SIGSEGV;
		goto out_put;
		}
		} else if (unlikely(!(vmf.vma_flags & (VM_READ\|VM_EXEC\|VM_WRITE)))) {
		trace_spf_vma_access(_RET_IP_, vma, address);
		ret = VM_FAULT_SIGSEGV;
		goto out_put;
		}
		if (unlikely(!(vmf.vma_flags & VM_WRITE)))
		goto out_segv;
		} else if (unlikely(!(vmf.vma_flags & (VM_READ\|VM_EXEC\|VM_WRITE))))
		goto out_segv;

		#ifdef CONFIG_NUMA
		struct mempolicy *pol;
		@@ -4376,13 +4376,13 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		* mpol_misplaced() which are not compatible with the
		*speculative page fault processing.
		*/
		pol = __get_vma_policy(vma, address);
		pol = __get_vma_policy(vmf.vma, address);
		if (!pol)
		pol = get_task_policy(current);
		if (!pol)
		if (pol && pol->mode == MPOL_INTERLEAVE) {
		trace_spf_vma_notsup(_RET_IP_, vma, address);
		goto out_put;
		trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}
		#endif

		@@ -4444,9 +4444,8 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		vmf.pte = NULL;
		}

		vmf.vma = vma;
		vmf.pgoff = linear_page_index(vma, address);
		vmf.gfp_mask = __get_fault_gfp_mask(vma);
		vmf.pgoff = linear_page_index(vmf.vma, address);
		vmf.gfp_mask = __get_fault_gfp_mask(vmf.vma);
		vmf.sequence = seq;
		vmf.flags = flags;

		@@ -4456,16 +4455,23 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		* We need to re-validate the VMA after checking the bounds, otherwise
		* we might have a false positive on the bounds.
		*/
		if (read_seqcount_retry(&vma->vm_sequence, seq)) {
		trace_spf_vma_changed(_RET_IP_, vma, address);
		goto out_put;
		if (read_seqcount_retry(&vmf.vma->vm_sequence, seq)) {
		trace_spf_vma_changed(_RET_IP_, vmf.vma, address);
		return VM_FAULT_RETRY;
		}

		mem_cgroup_oom_enable();
		ret = handle_pte_fault(&vmf);
		mem_cgroup_oom_disable();

		put_vma(vma);
		/*
		* If there is no need to retry, don't return the vma to the caller.
		*/
		if (ret != VM_FAULT_RETRY) {
		count_vm_event(SPECULATIVE_PGFAULT);
		put_vma(vmf.vma);
		*vma = NULL;
		}

		/*
		* The task may have entered a memcg OOM situation but
		@@ -4478,9 +4484,35 @@ int __handle_speculative_fault(struct mm_struct *mm, unsigned long address,
		return ret;

		out_walk:
		trace_spf_vma_notsup(_RET_IP_, vma, address);
		trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
		local_irq_enable();
		out_put:
		return VM_FAULT_RETRY;

		out_segv:
		trace_spf_vma_access(_RET_IP_, vmf.vma, address);
		/*
		* We don't return VM_FAULT_RETRY so the caller is not expected to
		* retrieve the fetched VMA.
		*/
		put_vma(vmf.vma);
		*vma = NULL;
		return VM_FAULT_SIGSEGV;
		}

		/*
		* This is used to know if the vma fetch in the speculative page fault handler
		* is still valid when trying the regular fault path while holding the
		* mmap_sem.
		* The call to put_vma(vma) must be made after checking the vma's fields, as
		* the vma may be freed by put_vma(). In such a case it is expected that false
		* is returned.
		*/
		bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address)
		{
		bool ret;

		ret = !RB_EMPTY_NODE(&vma->vm_rb) &&
		vma->vm_start <= address && address < vma->vm_end;
		put_vma(vma);
		return ret;
		}

mm/vmstat.c

+4 −1

Original line number	Diff line number	Diff line
		@@ -1219,7 +1219,10 @@ const char * const vmstat_text[] = {
		"swap_ra",
		"swap_ra_hit",
		#endif
		#endif /* CONFIG_VM_EVENTS_COUNTERS */
		#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
		"speculative_pgfault"
		#endif
		#endif /* CONFIG_VM_EVENT_COUNTERS */
		};
		#endif /* CONFIG_PROC_FS \|\| CONFIG_SYSFS \|\| CONFIG_NUMA */