Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6f4576e3 authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds
Browse files

mempolicy: apply page table walker on queue_pages_range()



queue_pages_range() does page table walking in its own way now, but there
is some code duplicate.  This patch applies page table walker to reduce
lines of code.

queue_pages_range() has to do some precheck to determine whether we really
walk over the vma or just skip it.  Now we have test_walk() callback in
mm_walk for this purpose, so we can do this replacement cleanly.
queue_pages_test_walk() depends on not only the current vma but also the
previous one, so queue_pages->prev is introduced to remember it.

Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1757bbd9
Loading
Loading
Loading
Loading
+92 −136
Original line number Diff line number Diff line
@@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
static void migrate_page_add(struct page *page, struct list_head *pagelist,
				unsigned long flags);

struct queue_pages {
	struct list_head *pagelist;
	unsigned long flags;
	nodemask_t *nmask;
	struct vm_area_struct *prev;
};

/*
 * Scan through pages checking if pages follow certain conditions,
 * and move them to the pagelist if they do.
 */
static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, unsigned long end,
		const nodemask_t *nodes, unsigned long flags,
		void *private)
static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
			unsigned long end, struct mm_walk *walk)
{
	pte_t *orig_pte;
	struct vm_area_struct *vma = walk->vma;
	struct page *page;
	struct queue_pages *qp = walk->private;
	unsigned long flags = qp->flags;
	int nid;
	pte_t *pte;
	spinlock_t *ptl;

	orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	do {
		struct page *page;
		int nid;
	split_huge_page_pmd(vma, addr, pmd);
	if (pmd_trans_unstable(pmd))
		return 0;

	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
	for (; addr != end; pte++, addr += PAGE_SIZE) {
		if (!pte_present(*pte))
			continue;
		page = vm_normal_page(vma, addr, *pte);
@@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
		if (PageReserved(page))
			continue;
		nid = page_to_nid(page);
		if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
		if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
			continue;

		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
			migrate_page_add(page, private, flags);
		else
			break;
	} while (pte++, addr += PAGE_SIZE, addr != end);
	pte_unmap_unlock(orig_pte, ptl);
	return addr != end;
			migrate_page_add(page, qp->pagelist, flags);
	}
	pte_unmap_unlock(pte - 1, ptl);
	cond_resched();
	return 0;
}

static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
		pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
				    void *private)
static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
			       unsigned long addr, unsigned long end,
			       struct mm_walk *walk)
{
#ifdef CONFIG_HUGETLB_PAGE
	struct queue_pages *qp = walk->private;
	unsigned long flags = qp->flags;
	int nid;
	struct page *page;
	spinlock_t *ptl;
	pte_t entry;

	ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
	entry = huge_ptep_get((pte_t *)pmd);
	ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
	entry = huge_ptep_get(pte);
	if (!pte_present(entry))
		goto unlock;
	page = pte_page(entry);
	nid = page_to_nid(page);
	if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
	if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
		goto unlock;
	/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
	if (flags & (MPOL_MF_MOVE_ALL) ||
	    (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
		isolate_huge_page(page, private);
		isolate_huge_page(page, qp->pagelist);
unlock:
	spin_unlock(ptl);
#else
	BUG();
#endif
}

static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
		unsigned long addr, unsigned long end,
		const nodemask_t *nodes, unsigned long flags,
		void *private)
{
	pmd_t *pmd;
	unsigned long next;

	pmd = pmd_offset(pud, addr);
	do {
		next = pmd_addr_end(addr, end);
		if (!pmd_present(*pmd))
			continue;
		if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
			queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
						flags, private);
			continue;
		}
		split_huge_page_pmd(vma, addr, pmd);
		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
			continue;
		if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
				    flags, private))
			return -EIO;
	} while (pmd++, addr = next, addr != end);
	return 0;
}

static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
		unsigned long addr, unsigned long end,
		const nodemask_t *nodes, unsigned long flags,
		void *private)
{
	pud_t *pud;
	unsigned long next;

	pud = pud_offset(pgd, addr);
	do {
		next = pud_addr_end(addr, end);
		if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
			continue;
		if (pud_none_or_clear_bad(pud))
			continue;
		if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
				    flags, private))
			return -EIO;
	} while (pud++, addr = next, addr != end);
	return 0;
}

static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
		unsigned long addr, unsigned long end,
		const nodemask_t *nodes, unsigned long flags,
		void *private)
{
	pgd_t *pgd;
	unsigned long next;

	pgd = pgd_offset(vma->vm_mm, addr);
	do {
		next = pgd_addr_end(addr, end);
		if (pgd_none_or_clear_bad(pgd))
			continue;
		if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
				    flags, private))
			return -EIO;
	} while (pgd++, addr = next, addr != end);
	return 0;
}

@@ -641,26 +583,13 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
}
#endif /* CONFIG_NUMA_BALANCING */

/*
 * Walk through page tables and collect pages to be migrated.
 *
 * If pages found in a given range are on a set of nodes (determined by
 * @nodes and @flags,) it's isolated and queued to the pagelist which is
 * passed via @private.)
 */
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
		const nodemask_t *nodes, unsigned long flags, void *private)
static int queue_pages_test_walk(unsigned long start, unsigned long end,
				struct mm_walk *walk)
{
	int err = 0;
	struct vm_area_struct *vma, *prev;

	vma = find_vma(mm, start);
	if (!vma)
		return -EFAULT;
	prev = NULL;
	for (; vma && vma->vm_start < end; vma = vma->vm_next) {
	struct vm_area_struct *vma = walk->vma;
	struct queue_pages *qp = walk->private;
	unsigned long endvma = vma->vm_end;
	unsigned long flags = qp->flags;

	if (endvma > end)
		endvma = end;
@@ -670,30 +599,57 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
	if (!(flags & MPOL_MF_DISCONTIG_OK)) {
		if (!vma->vm_next && vma->vm_end < end)
			return -EFAULT;
			if (prev && prev->vm_end < vma->vm_start)
		if (qp->prev && qp->prev->vm_end < vma->vm_start)
			return -EFAULT;
	}

	qp->prev = vma;

	if (vma->vm_flags & VM_PFNMAP)
		return 1;

	if (flags & MPOL_MF_LAZY) {
		/* Similar to task_numa_work, skip inaccessible VMAs */
		if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
			change_prot_numa(vma, start, endvma);
			goto next;
		return 1;
	}

	if ((flags & MPOL_MF_STRICT) ||
	    ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
		      vma_migratable(vma))) {

			err = queue_pages_pgd_range(vma, start, endvma, nodes,
						flags, private);
			if (err)
				break;
		}
next:
		prev = vma;
	     vma_migratable(vma)))
		/* queue pages from current vma */
		return 0;
	return 1;
}
	return err;

/*
 * Walk through page tables and collect pages to be migrated.
 *
 * If pages found in a given range are on a set of nodes (determined by
 * @nodes and @flags,) it's isolated and queued to the pagelist which is
 * passed via @private.)
 */
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
		nodemask_t *nodes, unsigned long flags,
		struct list_head *pagelist)
{
	struct queue_pages qp = {
		.pagelist = pagelist,
		.flags = flags,
		.nmask = nodes,
		.prev = NULL,
	};
	struct mm_walk queue_pages_walk = {
		.hugetlb_entry = queue_pages_hugetlb,
		.pmd_entry = queue_pages_pte_range,
		.test_walk = queue_pages_test_walk,
		.mm = mm,
		.private = &qp,
	};

	return walk_page_range(start, end, &queue_pages_walk);
}

/*