Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3565fce3 authored by Dan Williams's avatar Dan Williams Committed by Linus Torvalds
Browse files

mm, x86: get_user_pages() for dax mappings



A dax mapping establishes a pte with _PAGE_DEVMAP set when the driver
has established a devm_memremap_pages() mapping, i.e.  when the pfn_t
return from ->direct_access() has PFN_DEV and PFN_MAP set.  Later, when
encountering _PAGE_DEVMAP during a page table walk we lookup and pin a
struct dev_pagemap instance to keep the result of pfn_to_page() valid
until put_page().

Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Tested-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5c7fb56e
Loading
Loading
Loading
Loading
+7 −0
Original line number Original line Diff line number Diff line
@@ -479,6 +479,13 @@ static inline int pte_present(pte_t a)
	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}
}


#ifdef __HAVE_ARCH_PTE_DEVMAP
static inline int pte_devmap(pte_t a)
{
	return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
}
#endif

#define pte_accessible pte_accessible
#define pte_accessible pte_accessible
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
{
{
+54 −3
Original line number Original line Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/vmstat.h>
#include <linux/vmstat.h>
#include <linux/highmem.h>
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/swap.h>
#include <linux/memremap.h>


#include <asm/pgtable.h>
#include <asm/pgtable.h>


@@ -63,6 +64,16 @@ static inline pte_t gup_get_pte(pte_t *ptep)
#endif
#endif
}
}


static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
{
	while ((*nr) - nr_start) {
		struct page *page = pages[--(*nr)];

		ClearPageReferenced(page);
		put_page(page);
	}
}

/*
/*
 * The performance critical leaf functions are made noinline otherwise gcc
 * The performance critical leaf functions are made noinline otherwise gcc
 * inlines everything into a single function which results in too much
 * inlines everything into a single function which results in too much
@@ -71,7 +82,9 @@ static inline pte_t gup_get_pte(pte_t *ptep)
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		unsigned long end, int write, struct page **pages, int *nr)
		unsigned long end, int write, struct page **pages, int *nr)
{
{
	struct dev_pagemap *pgmap = NULL;
	unsigned long mask;
	unsigned long mask;
	int nr_start = *nr;
	pte_t *ptep;
	pte_t *ptep;


	mask = _PAGE_PRESENT|_PAGE_USER;
	mask = _PAGE_PRESENT|_PAGE_USER;
@@ -89,13 +102,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
			return 0;
			return 0;
		}
		}


		if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
		page = pte_page(pte);
		if (pte_devmap(pte)) {
			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
			if (unlikely(!pgmap)) {
				undo_dev_pagemap(nr, nr_start, pages);
				pte_unmap(ptep);
				return 0;
			}
		} else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
			pte_unmap(ptep);
			pte_unmap(ptep);
			return 0;
			return 0;
		}
		}
		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		page = pte_page(pte);
		get_page(page);
		get_page(page);
		put_dev_pagemap(pgmap);
		SetPageReferenced(page);
		SetPageReferenced(page);
		pages[*nr] = page;
		pages[*nr] = page;
		(*nr)++;
		(*nr)++;
@@ -114,6 +135,32 @@ static inline void get_head_page_multiple(struct page *page, int nr)
	SetPageReferenced(page);
	SetPageReferenced(page);
}
}


static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
		unsigned long end, struct page **pages, int *nr)
{
	int nr_start = *nr;
	unsigned long pfn = pmd_pfn(pmd);
	struct dev_pagemap *pgmap = NULL;

	pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
	do {
		struct page *page = pfn_to_page(pfn);

		pgmap = get_dev_pagemap(pfn, pgmap);
		if (unlikely(!pgmap)) {
			undo_dev_pagemap(nr, nr_start, pages);
			return 0;
		}
		SetPageReferenced(page);
		pages[*nr] = page;
		get_page(page);
		put_dev_pagemap(pgmap);
		(*nr)++;
		pfn++;
	} while (addr += PAGE_SIZE, addr != end);
	return 1;
}

static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
		unsigned long end, int write, struct page **pages, int *nr)
		unsigned long end, int write, struct page **pages, int *nr)
{
{
@@ -126,9 +173,13 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
		mask |= _PAGE_RW;
		mask |= _PAGE_RW;
	if ((pmd_flags(pmd) & mask) != mask)
	if ((pmd_flags(pmd) & mask) != mask)
		return 0;
		return 0;

	VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
	if (pmd_devmap(pmd))
		return __gup_device_huge_pmd(pmd, addr, end, pages, nr);

	/* hugepages are never "special" */
	/* hugepages are never "special" */
	VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
	VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
	VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));


	refs = 0;
	refs = 0;
	head = pmd_page(pmd);
	head = pmd_page(pmd);
+9 −1
Original line number Original line Diff line number Diff line
@@ -38,7 +38,6 @@ extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
			int prot_numa);
			int prot_numa);
int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *,
			pfn_t pfn, bool write);
			pfn_t pfn, bool write);

enum transparent_hugepage_flag {
enum transparent_hugepage_flag {
	TRANSPARENT_HUGEPAGE_FLAG,
	TRANSPARENT_HUGEPAGE_FLAG,
	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
@@ -55,6 +54,9 @@ enum transparent_hugepage_flag {
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)


#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd, int flags);

#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE	((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_SIZE	((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK	(~(HPAGE_PMD_SIZE - 1))
#define HPAGE_PMD_MASK	(~(HPAGE_PMD_SIZE - 1))
@@ -205,6 +207,12 @@ static inline bool is_huge_zero_page(struct page *page)
	return false;
	return false;
}
}



static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
		unsigned long addr, pmd_t *pmd, int flags)
{
	return NULL;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */


#endif /* _LINUX_HUGE_MM_H */
#endif /* _LINUX_HUGE_MM_H */
+41 −18
Original line number Original line Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/mm_types.h>
#include <linux/mm_types.h>
#include <linux/range.h>
#include <linux/range.h>
#include <linux/pfn.h>
#include <linux/pfn.h>
#include <linux/percpu-refcount.h>
#include <linux/bit_spinlock.h>
#include <linux/bit_spinlock.h>
#include <linux/shrinker.h>
#include <linux/shrinker.h>
#include <linux/resource.h>
#include <linux/resource.h>
@@ -465,17 +466,6 @@ static inline int page_count(struct page *page)
	return atomic_read(&compound_head(page)->_count);
	return atomic_read(&compound_head(page)->_count);
}
}


static inline void get_page(struct page *page)
{
	page = compound_head(page);
	/*
	 * Getting a normal page or the head of a compound page
	 * requires to already have an elevated page->_count.
	 */
	VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
	atomic_inc(&page->_count);
}

static inline struct page *virt_to_head_page(const void *x)
static inline struct page *virt_to_head_page(const void *x)
{
{
	struct page *page = virt_to_page(x);
	struct page *page = virt_to_page(x);
@@ -494,13 +484,6 @@ static inline void init_page_count(struct page *page)


void __put_page(struct page *page);
void __put_page(struct page *page);


static inline void put_page(struct page *page)
{
	page = compound_head(page);
	if (put_page_testzero(page))
		__put_page(page);
}

void put_pages_list(struct list_head *pages);
void put_pages_list(struct list_head *pages);


void split_page(struct page *page, unsigned int order);
void split_page(struct page *page, unsigned int order);
@@ -682,17 +665,50 @@ static inline enum zone_type page_zonenum(const struct page *page)
}
}


#ifdef CONFIG_ZONE_DEVICE
#ifdef CONFIG_ZONE_DEVICE
void get_zone_device_page(struct page *page);
void put_zone_device_page(struct page *page);
static inline bool is_zone_device_page(const struct page *page)
static inline bool is_zone_device_page(const struct page *page)
{
{
	return page_zonenum(page) == ZONE_DEVICE;
	return page_zonenum(page) == ZONE_DEVICE;
}
}
#else
#else
static inline void get_zone_device_page(struct page *page)
{
}
static inline void put_zone_device_page(struct page *page)
{
}
static inline bool is_zone_device_page(const struct page *page)
static inline bool is_zone_device_page(const struct page *page)
{
{
	return false;
	return false;
}
}
#endif
#endif


static inline void get_page(struct page *page)
{
	page = compound_head(page);
	/*
	 * Getting a normal page or the head of a compound page
	 * requires to already have an elevated page->_count.
	 */
	VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
	atomic_inc(&page->_count);

	if (unlikely(is_zone_device_page(page)))
		get_zone_device_page(page);
}

static inline void put_page(struct page *page)
{
	page = compound_head(page);

	if (put_page_testzero(page))
		__put_page(page);

	if (unlikely(is_zone_device_page(page)))
		put_zone_device_page(page);
}

#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#define SECTION_IN_PAGE_FLAGS
#endif
#endif
@@ -1444,6 +1460,13 @@ static inline void sync_mm_rss(struct mm_struct *mm)
}
}
#endif
#endif


#ifndef __HAVE_ARCH_PTE_DEVMAP
static inline int pte_devmap(pte_t pte)
{
	return 0;
}
#endif

int vma_wants_writenotify(struct vm_area_struct *vma);
int vma_wants_writenotify(struct vm_area_struct *vma);


extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
+12 −0
Original line number Original line Diff line number Diff line
@@ -169,6 +169,18 @@ struct page_map {
	struct vmem_altmap altmap;
	struct vmem_altmap altmap;
};
};


void get_zone_device_page(struct page *page)
{
	percpu_ref_get(page->pgmap->ref);
}
EXPORT_SYMBOL(get_zone_device_page);

void put_zone_device_page(struct page *page)
{
	put_dev_pagemap(page->pgmap);
}
EXPORT_SYMBOL(put_zone_device_page);

static void pgmap_radix_release(struct resource *res)
static void pgmap_radix_release(struct resource *res)
{
{
	resource_size_t key;
	resource_size_t key;
Loading