Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4fc3f1d6 authored by Ingo Molnar's avatar Ingo Molnar Committed by Mel Gorman
Browse files

mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable



rmap_walk_anon() and try_to_unmap_anon() appears to be too
careful about locking the anon vma: while it needs protection
against anon vma list modifications, it does not need exclusive
access to the list itself.

Transforming this exclusive lock to a read-locked rwsem removes
a global lock from the hot path of page-migration intense
threaded workloads which can cause pathological performance like
this:

    96.43%        process 0  [kernel.kallsyms]  [k] perf_trace_sched_switch
                  |
                  --- perf_trace_sched_switch
                      __schedule
                      schedule
                      schedule_preempt_disabled
                      __mutex_lock_common.isra.6
                      __mutex_lock_slowpath
                      mutex_lock
                     |
                     |--50.61%-- rmap_walk
                     |          move_to_new_page
                     |          migrate_pages
                     |          migrate_misplaced_page
                     |          __do_numa_page.isra.69
                     |          handle_pte_fault
                     |          handle_mm_fault
                     |          __do_page_fault
                     |          do_page_fault
                     |          page_fault
                     |          __memset_sse2
                     |          |
                     |           --100.00%-- worker_thread
                     |                     |
                     |                      --100.00%-- start_thread
                     |
                      --49.39%-- page_lock_anon_vma
                                try_to_unmap_anon
                                try_to_unmap
                                migrate_pages
                                migrate_misplaced_page
                                __do_numa_page.isra.69
                                handle_pte_fault
                                handle_mm_fault
                                __do_page_fault
                                do_page_fault
                                page_fault
                                __memset_sse2
                                |
                                 --100.00%-- worker_thread
                                           start_thread

With this change applied the profile is now nicely flat
and there's no anon-vma related scheduling/blocking.

Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(),
to make it clearer that it's an exclusive write-lock in
that case - suggested by Rik van Riel.

Suggested-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
parent 5a505085
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -102,7 +102,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
#define wait_split_huge_page(__anon_vma, __pmd)				\
#define wait_split_huge_page(__anon_vma, __pmd)				\
	do {								\
	do {								\
		pmd_t *____pmd = (__pmd);				\
		pmd_t *____pmd = (__pmd);				\
		anon_vma_lock(__anon_vma);				\
		anon_vma_lock_write(__anon_vma);			\
		anon_vma_unlock(__anon_vma);				\
		anon_vma_unlock(__anon_vma);				\
		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
		       pmd_trans_huge(*____pmd));			\
		       pmd_trans_huge(*____pmd));			\
+14 −3
Original line number Original line Diff line number Diff line
@@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
		up_write(&anon_vma->root->rwsem);
		up_write(&anon_vma->root->rwsem);
}
}


static inline void anon_vma_lock(struct anon_vma *anon_vma)
static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
{
{
	down_write(&anon_vma->root->rwsem);
	down_write(&anon_vma->root->rwsem);
}
}
@@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma)
	up_write(&anon_vma->root->rwsem);
	up_write(&anon_vma->root->rwsem);
}
}


static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
{
	down_read(&anon_vma->root->rwsem);
}

static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
{
	up_read(&anon_vma->root->rwsem);
}


/*
/*
 * anon_vma helper functions.
 * anon_vma helper functions.
 */
 */
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *);
/*
/*
 * Called by memory-failure.c to kill processes.
 * Called by memory-failure.c to kill processes.
 */
 */
struct anon_vma *page_lock_anon_vma(struct page *page);
struct anon_vma *page_lock_anon_vma_read(struct page *page);
void page_unlock_anon_vma(struct anon_vma *anon_vma);
void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);


/*
/*
+3 −3
Original line number Original line Diff line number Diff line
@@ -1549,7 +1549,7 @@ int split_huge_page(struct page *page)
	int ret = 1;
	int ret = 1;


	BUG_ON(!PageAnon(page));
	BUG_ON(!PageAnon(page));
	anon_vma = page_lock_anon_vma(page);
	anon_vma = page_lock_anon_vma_read(page);
	if (!anon_vma)
	if (!anon_vma)
		goto out;
		goto out;
	ret = 0;
	ret = 0;
@@ -1562,7 +1562,7 @@ int split_huge_page(struct page *page)


	BUG_ON(PageCompound(page));
	BUG_ON(PageCompound(page));
out_unlock:
out_unlock:
	page_unlock_anon_vma(anon_vma);
	page_unlock_anon_vma_read(anon_vma);
out:
out:
	return ret;
	return ret;
}
}
@@ -2074,7 +2074,7 @@ static void collapse_huge_page(struct mm_struct *mm,
	if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
	if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
		goto out;
		goto out;


	anon_vma_lock(vma->anon_vma);
	anon_vma_lock_write(vma->anon_vma);


	pte = pte_offset_map(pmd, address);
	pte = pte_offset_map(pmd, address);
	ptl = pte_lockptr(mm, pmd);
	ptl = pte_lockptr(mm, pmd);
+3 −3
Original line number Original line Diff line number Diff line
@@ -1634,7 +1634,7 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
		struct anon_vma_chain *vmac;
		struct anon_vma_chain *vmac;
		struct vm_area_struct *vma;
		struct vm_area_struct *vma;


		anon_vma_lock(anon_vma);
		anon_vma_lock_write(anon_vma);
		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
					       0, ULONG_MAX) {
					       0, ULONG_MAX) {
			vma = vmac->vma;
			vma = vmac->vma;
@@ -1688,7 +1688,7 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
		struct anon_vma_chain *vmac;
		struct anon_vma_chain *vmac;
		struct vm_area_struct *vma;
		struct vm_area_struct *vma;


		anon_vma_lock(anon_vma);
		anon_vma_lock_write(anon_vma);
		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
					       0, ULONG_MAX) {
					       0, ULONG_MAX) {
			vma = vmac->vma;
			vma = vmac->vma;
@@ -1741,7 +1741,7 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
		struct anon_vma_chain *vmac;
		struct anon_vma_chain *vmac;
		struct vm_area_struct *vma;
		struct vm_area_struct *vma;


		anon_vma_lock(anon_vma);
		anon_vma_lock_write(anon_vma);
		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
					       0, ULONG_MAX) {
					       0, ULONG_MAX) {
			vma = vmac->vma;
			vma = vmac->vma;
+2 −2
Original line number Original line Diff line number Diff line
@@ -402,7 +402,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
	struct anon_vma *av;
	struct anon_vma *av;
	pgoff_t pgoff;
	pgoff_t pgoff;


	av = page_lock_anon_vma(page);
	av = page_lock_anon_vma_read(page);
	if (av == NULL)	/* Not actually mapped anymore */
	if (av == NULL)	/* Not actually mapped anymore */
		return;
		return;


@@ -423,7 +423,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
		}
		}
	}
	}
	read_unlock(&tasklist_lock);
	read_unlock(&tasklist_lock);
	page_unlock_anon_vma(av);
	page_unlock_anon_vma_read(av);
}
}


/*
/*
Loading