Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5b65c467 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Ingo Molnar
Browse files

mm, x86/mm: Fix performance regression in get_user_pages_fast()

The 0-day test bot found a performance regression that was tracked down to
switching x86 to the generic get_user_pages_fast() implementation:

  http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop



The regression was caused by the fact that we now use local_irq_save() +
local_irq_restore() in get_user_pages_fast() to disable interrupts.
In x86 implementation local_irq_disable() + local_irq_enable() was used.

The fix is to make get_user_pages_fast() use local_irq_disable(),
leaving local_irq_save() for __get_user_pages_fast() that can be called
with interrupts disabled.

Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:

  Before:  Average: 14.91 ms, stddev: 0.45 ms
  After:   Average: 10.76 ms, stddev: 0.18 ms

Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: linux-mm@kvack.org
Fixes: e585513b ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 9e52fc2b
Loading
Loading
Loading
Loading
+58 −39
Original line number Diff line number Diff line
@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
	return 1;
}

static void gup_pgd_range(unsigned long addr, unsigned long end,
		int write, struct page **pages, int *nr)
{
	unsigned long next;
	pgd_t *pgdp;

	pgdp = pgd_offset(current->mm, addr);
	do {
		pgd_t pgd = READ_ONCE(*pgdp);

		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			return;
		if (unlikely(pgd_huge(pgd))) {
			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
					  pages, nr))
				return;
		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
					 PGDIR_SHIFT, next, write, pages, nr))
				return;
		} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
			return;
	} while (pgdp++, addr = next, addr != end);
}

#ifndef gup_fast_permitted
/*
 * Check if it's allowed to use __get_user_pages_fast() for the range, or
 * we need to fall back to the slow version:
 */
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
{
	unsigned long len, end;

	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;
	return end >= start;
}
#endif

/*
 * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
 * the regular GUP. It will only return non-negative values.
@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
			  struct page **pages)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr, len, end;
	unsigned long next, flags;
	pgd_t *pgdp;
	unsigned long flags;
	int nr = 0;

	start &= PAGE_MASK;
@@ -1652,44 +1691,14 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
	 * block IPIs that come from THPs splitting.
	 */

	if (gup_fast_permitted(start, nr_pages, write)) {
		local_irq_save(flags);
	pgdp = pgd_offset(mm, addr);
	do {
		pgd_t pgd = READ_ONCE(*pgdp);

		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			break;
		if (unlikely(pgd_huge(pgd))) {
			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
					  pages, &nr))
				break;
		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
					 PGDIR_SHIFT, next, write, pages, &nr))
				break;
		} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
			break;
	} while (pgdp++, addr = next, addr != end);
		gup_pgd_range(addr, end, write, pages, &nr);
		local_irq_restore(flags);

	return nr;
	}

#ifndef gup_fast_permitted
/*
 * Check if it's allowed to use __get_user_pages_fast() for the range, or
 * we need to fall back to the slow version:
 */
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
{
	unsigned long len, end;

	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;
	return end >= start;
	return nr;
}
#endif

/**
 * get_user_pages_fast() - pin user pages in memory
@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
			struct page **pages)
{
	unsigned long addr, len, end;
	int nr = 0, ret = 0;

	start &= PAGE_MASK;
	addr = start;
	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;

	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
					(void __user *)start, len)))
		return 0;

	if (gup_fast_permitted(start, nr_pages, write)) {
		nr = __get_user_pages_fast(start, nr_pages, write, pages);
		local_irq_disable();
		gup_pgd_range(addr, end, write, pages, &nr);
		local_irq_enable();
		ret = nr;
	}