Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b3b9c293 authored by Konstantin Khlebnikov's avatar Konstantin Khlebnikov Committed by Linus Torvalds
Browse files

mm, x86, pat: rework linear pfn-mmap tracking



Replace the generic vma-flag VM_PFN_AT_MMAP with x86-only VM_PAT.

We can toss mapping address from remap_pfn_range() into
track_pfn_vma_new(), and collect all PAT-related logic together in
arch/x86/.

This patch also restores orignal frustration-free is_cow_mapping() check
in remap_pfn_range(), as it was before commit v2.6.28-rc8-88-g3c8bb73
("x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3")

is_linear_pfn_mapping() checks can be removed from mm/huge_memory.c,
because it already handled by VM_PFNMAP in VM_NO_THP bit-mask.

[suresh.b.siddha@intel.com: Reset the VM_PAT flag as part of untrack_pfn_vma()]
Signed-off-by: default avatarKonstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5180da41
Loading
Loading
Loading
Loading
+12 −5
Original line number Diff line number Diff line
@@ -677,7 +677,7 @@ int track_pfn_copy(struct vm_area_struct *vma)
	unsigned long vma_size = vma->vm_end - vma->vm_start;
	pgprot_t pgprot;

	if (is_linear_pfn_mapping(vma)) {
	if (vma->vm_flags & VM_PAT) {
		/*
		 * reserve the whole chunk covered by vma. We need the
		 * starting address and protection from pte.
@@ -699,14 +699,20 @@ int track_pfn_copy(struct vm_area_struct *vma)
 * single reserve_pfn_range call.
 */
int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
		    unsigned long pfn, unsigned long size)
		    unsigned long pfn, unsigned long addr, unsigned long size)
{
	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
	unsigned long flags;

	/* reserve the whole chunk starting from paddr */
	if (is_linear_pfn_mapping(vma))
		return reserve_pfn_range(paddr, size, prot, 0);
	if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
		int ret;

		ret = reserve_pfn_range(paddr, size, prot, 0);
		if (!ret)
			vma->vm_flags |= VM_PAT;
		return ret;
	}

	if (!pat_enabled)
		return 0;
@@ -758,7 +764,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
	resource_size_t paddr;
	unsigned long prot;

	if (!is_linear_pfn_mapping(vma))
	if (!(vma->vm_flags & VM_PAT))
		return;

	/* free the chunk starting from pfn or the whole chunk */
@@ -772,6 +778,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
		size = vma->vm_end - vma->vm_start;
	}
	free_pfn_range(paddr, size);
	vma->vm_flags &= ~VM_PAT;
}

pgprot_t pgprot_writecombine(pgprot_t prot)
+4 −2
Original line number Diff line number Diff line
@@ -391,7 +391,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 * by remap_pfn_range() for physical range indicated by pfn and size.
 */
static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
				  unsigned long pfn, unsigned long size)
				  unsigned long pfn, unsigned long addr,
				  unsigned long size)
{
	return 0;
}
@@ -426,7 +427,8 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
}
#else
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
			   unsigned long pfn, unsigned long size);
			   unsigned long pfn, unsigned long addr,
			   unsigned long size);
extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
			    unsigned long pfn);
extern int track_pfn_copy(struct vm_area_struct *vma);
+1 −19
Original line number Diff line number Diff line
@@ -117,7 +117,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
#define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
#define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
#define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
#define VM_PAT		0x40000000	/* PAT reserves whole VMA at once (x86) */
#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */

/* Bits set in the VMA until the stack is in its final location */
@@ -158,24 +158,6 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */

/*
 * This interface is used by x86 PAT code to identify a pfn mapping that is
 * linear over entire vma. This is to optimize PAT code that deals with
 * marking the physical region with a particular prot. This is not for generic
 * mm use. Note also that this check will not work if the pfn mapping is
 * linear for a vma starting at physical address 0. In which case PAT code
 * falls back to slow path of reserving physical range page by page.
 */
static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
{
	return !!(vma->vm_flags & VM_PFN_AT_MMAP);
}

static inline int is_pfn_mapping(struct vm_area_struct *vma)
{
	return !!(vma->vm_flags & VM_PFNMAP);
}

/*
 * vm_fault is filled by the the pagefault handler and passed to the vma's
 * ->fault function. The vma's ->fault is responsible for returning a bitmask
+3 −16
Original line number Diff line number Diff line
@@ -1655,11 +1655,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
	if (vma->vm_ops)
		/* khugepaged not yet working on file or special mappings */
		return 0;
	/*
	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
	 * true too, verify it here.
	 */
	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
	hend = vma->vm_end & HPAGE_PMD_MASK;
	if (hstart < hend)
@@ -1912,11 +1908,7 @@ static void collapse_huge_page(struct mm_struct *mm,
		goto out;
	if (is_vma_temporary_stack(vma))
		goto out;
	/*
	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
	 * true too, verify it here.
	 */
	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
	VM_BUG_ON(vma->vm_flags & VM_NO_THP);

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
@@ -2154,12 +2146,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
			goto skip;
		if (is_vma_temporary_stack(vma))
			goto skip;
		/*
		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
		 * must be true too, verify it here.
		 */
		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
			  vma->vm_flags & VM_NO_THP);
		VM_BUG_ON(vma->vm_flags & VM_NO_THP);

		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
		hend = vma->vm_end & HPAGE_PMD_MASK;
+10 −16
Original line number Diff line number Diff line
@@ -1055,7 +1055,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
	if (is_vm_hugetlb_page(vma))
		return copy_hugetlb_page_range(dst_mm, src_mm, vma);

	if (unlikely(is_pfn_mapping(vma))) {
	if (unlikely(vma->vm_flags & VM_PFNMAP)) {
		/*
		 * We do not free on error cases below as remove_vma
		 * gets called on error from higher level routine
@@ -1327,7 +1327,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
	if (vma->vm_file)
		uprobe_munmap(vma, start, end);

	if (unlikely(is_pfn_mapping(vma)))
	if (unlikely(vma->vm_flags & VM_PFNMAP))
		untrack_pfn(vma, 0, 0);

	if (start != end) {
@@ -2299,26 +2299,20 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
	 * There's a horrible special case to handle copy-on-write
	 * behaviour that some programs depend on. We mark the "original"
	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
	 * See vm_normal_page() for details.
	 */
	if (addr == vma->vm_start && end == vma->vm_end) {
	if (is_cow_mapping(vma->vm_flags)) {
		if (addr != vma->vm_start || end != vma->vm_end)
			return -EINVAL;
		vma->vm_pgoff = pfn;
		vma->vm_flags |= VM_PFN_AT_MMAP;
	} else if (is_cow_mapping(vma->vm_flags))
	}

	err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
	if (err)
		return -EINVAL;

	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;

	err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
	if (err) {
		/*
		 * To indicate that track_pfn related cleanup is not
		 * needed from higher level routine calling unmap_vmas
		 */
		vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
		vma->vm_flags &= ~VM_PFN_AT_MMAP;
		return -EINVAL;
	}

	BUG_ON(addr >= end);
	pfn -= addr >> PAGE_SHIFT;
	pgd = pgd_offset(mm, addr);