mm: avoid taking rmap locks in move_ptes() (38a76013) · Commits · e / devices / android_kernel_oneplus_sm8150

fs/exec.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -603,7 +603,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
		* process cleanup to remove whatever mess we made.
		*/
		if (length != move_page_tables(vma, old_start,
		vma, new_start, length))
		vma, new_start, length, false))
		return -ENOMEM;

		lru_add_drain();

include/linux/mm.h

+4 −2

Original line number	Diff line number	Diff line
		@@ -1060,7 +1060,8 @@ vm_is_stack(struct task_struct task, struct vm_area_struct vma, int in_group);

		extern unsigned long move_page_tables(struct vm_area_struct *vma,
		unsigned long old_addr, struct vm_area_struct *new_vma,
		unsigned long new_addr, unsigned long len);
		unsigned long new_addr, unsigned long len,
		bool need_rmap_locks);
		extern unsigned long do_mremap(unsigned long addr,
		unsigned long old_len, unsigned long new_len,
		unsigned long flags, unsigned long new_addr);
		@@ -1410,7 +1411,8 @@ extern void __vma_link_rb(struct mm_struct , struct vm_area_struct ,
		struct rb_node *, struct rb_node );
		extern void unlink_file_vma(struct vm_area_struct *);
		extern struct vm_area_struct copy_vma(struct vm_area_struct *,
		unsigned long addr, unsigned long len, pgoff_t pgoff);
		unsigned long addr, unsigned long len, pgoff_t pgoff,
		bool *need_rmap_locks);
		extern void exit_mmap(struct mm_struct *);

		extern int mm_take_all_locks(struct mm_struct *mm);

mm/mmap.c

+5 −2

Original line number	Diff line number	Diff line
		@@ -2371,7 +2371,8 @@ int insert_vm_struct(struct mm_struct mm, struct vm_area_struct vma)
		* prior to moving page table entries, to effect an mremap move.
		*/
		struct vm_area_struct copy_vma(struct vm_area_struct *vmap,
		unsigned long addr, unsigned long len, pgoff_t pgoff)
		unsigned long addr, unsigned long len, pgoff_t pgoff,
		bool *need_rmap_locks)
		{
		struct vm_area_struct vma = vmap;
		unsigned long vma_start = vma->vm_start;
		@@ -2413,8 +2414,9 @@ struct vm_area_struct copy_vma(struct vm_area_struct *vmap,
		* linear if there are no pages mapped yet.
		*/
		VM_BUG_ON(faulted_in_anon_vma);
		*vmap = new_vma;
		*vmap = vma = new_vma;
		}
		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
		} else {
		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
		if (new_vma) {
		@@ -2434,6 +2436,7 @@ struct vm_area_struct copy_vma(struct vm_area_struct *vmap,
		if (new_vma->vm_ops && new_vma->vm_ops->open)
		new_vma->vm_ops->open(new_vma);
		vma_link(mm, new_vma, prev, rb_link, rb_parent);
		*need_rmap_locks = false;
		}
		}
		return new_vma;

mm/mremap.c

+39 −18

Original line number	Diff line number	Diff line
		@@ -71,26 +71,42 @@ static pmd_t alloc_new_pmd(struct mm_struct mm, struct vm_area_struct *vma,
		static void move_ptes(struct vm_area_struct vma, pmd_t old_pmd,
		unsigned long old_addr, unsigned long old_end,
		struct vm_area_struct new_vma, pmd_t new_pmd,
		unsigned long new_addr)
		unsigned long new_addr, bool need_rmap_locks)
		{
		struct address_space *mapping = NULL;
		struct anon_vma *anon_vma = vma->anon_vma;
		struct anon_vma *anon_vma = NULL;
		struct mm_struct *mm = vma->vm_mm;
		pte_t old_pte, new_pte, pte;
		spinlock_t old_ptl, new_ptl;

		if (vma->vm_file) {
		/*
		* Subtle point from Rajesh Venkatasubramanian: before
		* moving file-based ptes, we must lock truncate_pagecache
		* out, since it might clean the dst vma before the src vma,
		* and we propagate stale pages into the dst afterward.
		* When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
		* locks to ensure that rmap will always observe either the old or the
		* new ptes. This is the easiest way to avoid races with
		* truncate_pagecache(), page migration, etc...
		*
		* When need_rmap_locks is false, we use other ways to avoid
		* such races:
		*
		* - During exec() shift_arg_pages(), we use a specially tagged vma
		* which rmap call sites look for using is_vma_temporary_stack().
		*
		* - During mremap(), new_vma is often known to be placed after vma
		* in rmap traversal order. This ensures rmap will always observe
		* either the old pte, or the new pte, or both (the page table locks
		* serialize access to individual ptes, but only rmap traversal
		* order guarantees that we won't miss both the old and new ptes).
		*/
		if (need_rmap_locks) {
		if (vma->vm_file) {
		mapping = vma->vm_file->f_mapping;
		mutex_lock(&mapping->i_mmap_mutex);
		}
		if (anon_vma)
		if (vma->anon_vma) {
		anon_vma = vma->anon_vma;
		anon_vma_lock(anon_vma);
		}
		}

		/*
		* We don't have to worry about the ordering of src and dst
		@@ -127,7 +143,8 @@ static void move_ptes(struct vm_area_struct vma, pmd_t old_pmd,

		unsigned long move_page_tables(struct vm_area_struct *vma,
		unsigned long old_addr, struct vm_area_struct *new_vma,
		unsigned long new_addr, unsigned long len)
		unsigned long new_addr, unsigned long len,
		bool need_rmap_locks)
		{
		unsigned long extent, next, old_end;
		pmd_t old_pmd, new_pmd;
		@@ -174,7 +191,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
		if (extent > LATENCY_LIMIT)
		extent = LATENCY_LIMIT;
		move_ptes(vma, old_pmd, old_addr, old_addr + extent,
		new_vma, new_pmd, new_addr);
		new_vma, new_pmd, new_addr, need_rmap_locks);
		need_flush = true;
		}
		if (likely(need_flush))
		@@ -198,6 +215,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
		unsigned long hiwater_vm;
		int split = 0;
		int err;
		bool need_rmap_locks;

		/*
		* We'd prefer to avoid failure later on in do_munmap:
		@@ -219,18 +237,21 @@ static unsigned long move_vma(struct vm_area_struct *vma,
		return err;

		new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
		new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
		new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
		&need_rmap_locks);
		if (!new_vma)
		return -ENOMEM;

		moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
		moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
		need_rmap_locks);
		if (moved_len < old_len) {
		/*
		* On error, move entries back from new area to old,
		* which will succeed since page tables still there,
		* and then proceed to unmap new area instead of old.
		*/
		move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
		move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
		true);
		vma = new_vma;
		old_len = new_len;
		old_addr = new_addr;