Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7b86ac33 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jason Gunthorpe
Browse files

pagewalk: separate function pointers from iterator data

The mm_walk structure currently mixed data and code.  Split out the
operations vectors into a new mm_walk_ops structure, and while we are
changing the API also declare the mm_walk structure inside the
walk_page_range and walk_page_vma functions.

Based on patch from Linus Torvalds.

Link: https://lore.kernel.org/r/20190828141955.22210-3-hch@lst.de


Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarThomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: default avatarSteven Price <steven.price@arm.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent a520110e
Loading
Loading
Loading
Loading
+12 −10
Original line number Original line Diff line number Diff line
@@ -44,6 +44,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
	return 0;
	return 0;
}
}


static const struct mm_walk_ops set_nocache_walk_ops = {
	.pte_entry		= page_set_nocache,
};

static int
static int
page_clear_nocache(pte_t *pte, unsigned long addr,
page_clear_nocache(pte_t *pte, unsigned long addr,
		   unsigned long next, struct mm_walk *walk)
		   unsigned long next, struct mm_walk *walk)
@@ -59,6 +63,10 @@ page_clear_nocache(pte_t *pte, unsigned long addr,
	return 0;
	return 0;
}
}


static const struct mm_walk_ops clear_nocache_walk_ops = {
	.pte_entry		= page_clear_nocache,
};

/*
/*
 * Alloc "coherent" memory, which for OpenRISC means simply uncached.
 * Alloc "coherent" memory, which for OpenRISC means simply uncached.
 *
 *
@@ -81,10 +89,6 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
{
{
	unsigned long va;
	unsigned long va;
	void *page;
	void *page;
	struct mm_walk walk = {
		.pte_entry = page_set_nocache,
		.mm = &init_mm
	};


	page = alloc_pages_exact(size, gfp | __GFP_ZERO);
	page = alloc_pages_exact(size, gfp | __GFP_ZERO);
	if (!page)
	if (!page)
@@ -99,7 +103,8 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
	 * We need to iterate through the pages, clearing the dcache for
	 * We need to iterate through the pages, clearing the dcache for
	 * them and setting the cache-inhibit bit.
	 * them and setting the cache-inhibit bit.
	 */
	 */
	if (walk_page_range(va, va + size, &walk)) {
	if (walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops,
			NULL)) {
		free_pages_exact(page, size);
		free_pages_exact(page, size);
		return NULL;
		return NULL;
	}
	}
@@ -112,13 +117,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
		dma_addr_t dma_handle, unsigned long attrs)
		dma_addr_t dma_handle, unsigned long attrs)
{
{
	unsigned long va = (unsigned long)vaddr;
	unsigned long va = (unsigned long)vaddr;
	struct mm_walk walk = {
		.pte_entry = page_clear_nocache,
		.mm = &init_mm
	};


	/* walk_page_range shouldn't be able to fail here */
	/* walk_page_range shouldn't be able to fail here */
	WARN_ON(walk_page_range(va, va + size, &walk));
	WARN_ON(walk_page_range(&init_mm, va, va + size,
			&clear_nocache_walk_ops, NULL));


	free_pages_exact(vaddr, size);
	free_pages_exact(vaddr, size);
}
}
+5 −5
Original line number Original line Diff line number Diff line
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
	return 0;
	return 0;
}
}


static const struct mm_walk_ops subpage_walk_ops = {
	.pmd_entry	= subpage_walk_pmd_entry,
};

static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
				    unsigned long len)
				    unsigned long len)
{
{
	struct vm_area_struct *vma;
	struct vm_area_struct *vma;
	struct mm_walk subpage_proto_walk = {
		.mm = mm,
		.pmd_entry = subpage_walk_pmd_entry,
	};


	/*
	/*
	 * We don't try too hard, we just mark all the vma in that range
	 * We don't try too hard, we just mark all the vma in that range
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
		if (vma->vm_start >= (addr + len))
		if (vma->vm_start >= (addr + len))
			break;
			break;
		vma->vm_flags |= VM_NOHUGEPAGE;
		vma->vm_flags |= VM_NOHUGEPAGE;
		walk_page_vma(vma, &subpage_proto_walk);
		walk_page_vma(vma, &subpage_walk_ops, NULL);
		vma = vma->vm_next;
		vma = vma->vm_next;
	}
	}
}
}
+15 −18
Original line number Original line Diff line number Diff line
@@ -2521,13 +2521,9 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
	return 0;
	return 0;
}
}


static inline void zap_zero_pages(struct mm_struct *mm)
static const struct mm_walk_ops zap_zero_walk_ops = {
{
	.pmd_entry	= __zap_zero_pages,
	struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
};

	walk.mm = mm;
	walk_page_range(0, TASK_SIZE, &walk);
}


/*
/*
 * switch on pgstes for its userspace process (for kvm)
 * switch on pgstes for its userspace process (for kvm)
@@ -2546,7 +2542,7 @@ int s390_enable_sie(void)
	mm->context.has_pgste = 1;
	mm->context.has_pgste = 1;
	/* split thp mappings and disable thp for future mappings */
	/* split thp mappings and disable thp for future mappings */
	thp_split_mm(mm);
	thp_split_mm(mm);
	zap_zero_pages(mm);
	walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
	up_write(&mm->mmap_sem);
	up_write(&mm->mmap_sem);
	return 0;
	return 0;
}
}
@@ -2589,12 +2585,13 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
	return 0;
	return 0;
}
}


int s390_enable_skey(void)
static const struct mm_walk_ops enable_skey_walk_ops = {
{
	struct mm_walk walk = {
	.hugetlb_entry		= __s390_enable_skey_hugetlb,
	.hugetlb_entry		= __s390_enable_skey_hugetlb,
	.pte_entry		= __s390_enable_skey_pte,
	.pte_entry		= __s390_enable_skey_pte,
};
};

int s390_enable_skey(void)
{
	struct mm_struct *mm = current->mm;
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	struct vm_area_struct *vma;
	int rc = 0;
	int rc = 0;
@@ -2614,8 +2611,7 @@ int s390_enable_skey(void)
	}
	}
	mm->def_flags &= ~VM_MERGEABLE;
	mm->def_flags &= ~VM_MERGEABLE;


	walk.mm = mm;
	walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
	walk_page_range(0, TASK_SIZE, &walk);


out_up:
out_up:
	up_write(&mm->mmap_sem);
	up_write(&mm->mmap_sem);
@@ -2633,13 +2629,14 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
	return 0;
	return 0;
}
}


static const struct mm_walk_ops reset_cmma_walk_ops = {
	.pte_entry		= __s390_reset_cmma,
};

void s390_reset_cmma(struct mm_struct *mm)
void s390_reset_cmma(struct mm_struct *mm)
{
{
	struct mm_walk walk = { .pte_entry = __s390_reset_cmma };

	down_write(&mm->mmap_sem);
	down_write(&mm->mmap_sem);
	walk.mm = mm;
	walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
	walk_page_range(0, TASK_SIZE, &walk);
	up_write(&mm->mmap_sem);
	up_write(&mm->mmap_sem);
}
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+41 −37
Original line number Original line Diff line number Diff line
@@ -513,7 +513,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,


	return 0;
	return 0;
}
}
#endif
#else
#define smaps_pte_hole		NULL
#endif /* CONFIG_SHMEM */


static void smaps_pte_entry(pte_t *pte, unsigned long addr,
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
		struct mm_walk *walk)
		struct mm_walk *walk)
@@ -729,21 +731,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
	}
	}
	return 0;
	return 0;
}
}
#else
#define smaps_hugetlb_range	NULL
#endif /* HUGETLB_PAGE */
#endif /* HUGETLB_PAGE */


static void smap_gather_stats(struct vm_area_struct *vma,
static const struct mm_walk_ops smaps_walk_ops = {
			     struct mem_size_stats *mss)
{
	struct mm_walk smaps_walk = {
	.pmd_entry		= smaps_pte_range,
	.pmd_entry		= smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
	.hugetlb_entry		= smaps_hugetlb_range,
	.hugetlb_entry		= smaps_hugetlb_range,
#endif
		.mm = vma->vm_mm,
};
};


	smaps_walk.private = mss;
static const struct mm_walk_ops smaps_shmem_walk_ops = {
	.pmd_entry		= smaps_pte_range,
	.hugetlb_entry		= smaps_hugetlb_range,
	.pte_hole		= smaps_pte_hole,
};


static void smap_gather_stats(struct vm_area_struct *vma,
			     struct mem_size_stats *mss)
{
#ifdef CONFIG_SHMEM
#ifdef CONFIG_SHMEM
	/* In case of smaps_rollup, reset the value from previous vma */
	/* In case of smaps_rollup, reset the value from previous vma */
	mss->check_shmem_swap = false;
	mss->check_shmem_swap = false;
@@ -765,12 +770,13 @@ static void smap_gather_stats(struct vm_area_struct *vma,
			mss->swap += shmem_swapped;
			mss->swap += shmem_swapped;
		} else {
		} else {
			mss->check_shmem_swap = true;
			mss->check_shmem_swap = true;
			smaps_walk.pte_hole = smaps_pte_hole;
			walk_page_vma(vma, &smaps_shmem_walk_ops, mss);
			return;
		}
		}
	}
	}
#endif
#endif
	/* mmap_sem is held in m_start */
	/* mmap_sem is held in m_start */
	walk_page_vma(vma, &smaps_walk);
	walk_page_vma(vma, &smaps_walk_ops, mss);
}
}


#define SEQ_PUT_DEC(str, val) \
#define SEQ_PUT_DEC(str, val) \
@@ -1118,6 +1124,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
	return 0;
	return 0;
}
}


static const struct mm_walk_ops clear_refs_walk_ops = {
	.pmd_entry		= clear_refs_pte_range,
	.test_walk		= clear_refs_test_walk,
};

static ssize_t clear_refs_write(struct file *file, const char __user *buf,
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
				size_t count, loff_t *ppos)
				size_t count, loff_t *ppos)
{
{
@@ -1151,12 +1162,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
		struct clear_refs_private cp = {
		struct clear_refs_private cp = {
			.type = type,
			.type = type,
		};
		};
		struct mm_walk clear_refs_walk = {
			.pmd_entry = clear_refs_pte_range,
			.test_walk = clear_refs_test_walk,
			.mm = mm,
			.private = &cp,
		};


		if (type == CLEAR_REFS_MM_HIWATER_RSS) {
		if (type == CLEAR_REFS_MM_HIWATER_RSS) {
			if (down_write_killable(&mm->mmap_sem)) {
			if (down_write_killable(&mm->mmap_sem)) {
@@ -1217,7 +1222,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
						0, NULL, mm, 0, -1UL);
						0, NULL, mm, 0, -1UL);
			mmu_notifier_invalidate_range_start(&range);
			mmu_notifier_invalidate_range_start(&range);
		}
		}
		walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
		walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
				&cp);
		if (type == CLEAR_REFS_SOFT_DIRTY)
		if (type == CLEAR_REFS_SOFT_DIRTY)
			mmu_notifier_invalidate_range_end(&range);
			mmu_notifier_invalidate_range_end(&range);
		tlb_finish_mmu(&tlb, 0, -1);
		tlb_finish_mmu(&tlb, 0, -1);
@@ -1489,8 +1495,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,


	return err;
	return err;
}
}
#else
#define pagemap_hugetlb_range	NULL
#endif /* HUGETLB_PAGE */
#endif /* HUGETLB_PAGE */


static const struct mm_walk_ops pagemap_ops = {
	.pmd_entry	= pagemap_pmd_range,
	.pte_hole	= pagemap_pte_hole,
	.hugetlb_entry	= pagemap_hugetlb_range,
};

/*
/*
 * /proc/pid/pagemap - an array mapping virtual pages to pfns
 * /proc/pid/pagemap - an array mapping virtual pages to pfns
 *
 *
@@ -1522,7 +1536,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
{
{
	struct mm_struct *mm = file->private_data;
	struct mm_struct *mm = file->private_data;
	struct pagemapread pm;
	struct pagemapread pm;
	struct mm_walk pagemap_walk = {};
	unsigned long src;
	unsigned long src;
	unsigned long svpfn;
	unsigned long svpfn;
	unsigned long start_vaddr;
	unsigned long start_vaddr;
@@ -1550,14 +1563,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
	if (!pm.buffer)
	if (!pm.buffer)
		goto out_mm;
		goto out_mm;


	pagemap_walk.pmd_entry = pagemap_pmd_range;
	pagemap_walk.pte_hole = pagemap_pte_hole;
#ifdef CONFIG_HUGETLB_PAGE
	pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
#endif
	pagemap_walk.mm = mm;
	pagemap_walk.private = &pm;

	src = *ppos;
	src = *ppos;
	svpfn = src / PM_ENTRY_BYTES;
	svpfn = src / PM_ENTRY_BYTES;
	start_vaddr = svpfn << PAGE_SHIFT;
	start_vaddr = svpfn << PAGE_SHIFT;
@@ -1586,7 +1591,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
		ret = down_read_killable(&mm->mmap_sem);
		ret = down_read_killable(&mm->mmap_sem);
		if (ret)
		if (ret)
			goto out_free;
			goto out_free;
		ret = walk_page_range(start_vaddr, end, &pagemap_walk);
		ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
		up_read(&mm->mmap_sem);
		up_read(&mm->mmap_sem);
		start_vaddr = end;
		start_vaddr = end;


@@ -1798,6 +1803,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
}
}
#endif
#endif


static const struct mm_walk_ops show_numa_ops = {
	.hugetlb_entry = gather_hugetlb_stats,
	.pmd_entry = gather_pte_stats,
};

/*
/*
 * Display pages allocated per node and memory policy via /proc.
 * Display pages allocated per node and memory policy via /proc.
 */
 */
@@ -1809,12 +1819,6 @@ static int show_numa_map(struct seq_file *m, void *v)
	struct numa_maps *md = &numa_priv->md;
	struct numa_maps *md = &numa_priv->md;
	struct file *file = vma->vm_file;
	struct file *file = vma->vm_file;
	struct mm_struct *mm = vma->vm_mm;
	struct mm_struct *mm = vma->vm_mm;
	struct mm_walk walk = {
		.hugetlb_entry = gather_hugetlb_stats,
		.pmd_entry = gather_pte_stats,
		.private = md,
		.mm = mm,
	};
	struct mempolicy *pol;
	struct mempolicy *pol;
	char buffer[64];
	char buffer[64];
	int nid;
	int nid;
@@ -1848,7 +1852,7 @@ static int show_numa_map(struct seq_file *m, void *v)
		seq_puts(m, " huge");
		seq_puts(m, " huge");


	/* mmap_sem is held by m_start */
	/* mmap_sem is held by m_start */
	walk_page_vma(vma, &walk);
	walk_page_vma(vma, &show_numa_ops, md);


	if (!md->pages)
	if (!md->pages)
		goto out;
		goto out;
+38 −26
Original line number Original line Diff line number Diff line
@@ -4,8 +4,10 @@


#include <linux/mm.h>
#include <linux/mm.h>


struct mm_walk;

/**
/**
 * mm_walk - callbacks for walk_page_range
 * mm_walk_ops - callbacks for walk_page_range
 * @pud_entry:		if set, called for each non-empty PUD (2nd-level) entry
 * @pud_entry:		if set, called for each non-empty PUD (2nd-level) entry
 *			this handler should only handle pud_trans_huge() puds.
 *			this handler should only handle pud_trans_huge() puds.
 *			the pmd_entry or pte_entry callbacks will be used for
 *			the pmd_entry or pte_entry callbacks will be used for
@@ -18,17 +20,12 @@
 * @pte_hole:		if set, called for each hole at all levels
 * @pte_hole:		if set, called for each hole at all levels
 * @hugetlb_entry:	if set, called for each hugetlb entry
 * @hugetlb_entry:	if set, called for each hugetlb entry
 * @test_walk:		caller specific callback function to determine whether
 * @test_walk:		caller specific callback function to determine whether
 *             we walk over the current vma or not. Returning 0
 *			we walk over the current vma or not. Returning 0 means
 *             value means "do page table walk over the current vma,"
 *			"do page table walk over the current vma", returning
 *             and a negative one means "abort current page table walk
 *			a negative value means "abort current page table walk
 *             right now." 1 means "skip the current vma."
 *			right now" and returning 1 means "skip the current vma"
 * @mm:        mm_struct representing the target process of page table walk
 * @vma:       vma currently walked (NULL if walking outside vmas)
 * @private:   private data for callbacks' usage
 *
 * (see the comment on walk_page_range() for more details)
 */
 */
struct mm_walk {
struct mm_walk_ops {
	int (*pud_entry)(pud_t *pud, unsigned long addr,
	int (*pud_entry)(pud_t *pud, unsigned long addr,
			 unsigned long next, struct mm_walk *walk);
			 unsigned long next, struct mm_walk *walk);
	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
@@ -42,13 +39,28 @@ struct mm_walk {
			     struct mm_walk *walk);
			     struct mm_walk *walk);
	int (*test_walk)(unsigned long addr, unsigned long next,
	int (*test_walk)(unsigned long addr, unsigned long next,
			struct mm_walk *walk);
			struct mm_walk *walk);
};

/**
 * mm_walk - walk_page_range data
 * @ops:	operation to call during the walk
 * @mm:		mm_struct representing the target process of page table walk
 * @vma:	vma currently walked (NULL if walking outside vmas)
 * @private:	private data for callbacks' usage
 *
 * (see the comment on walk_page_range() for more details)
 */
struct mm_walk {
	const struct mm_walk_ops *ops;
	struct mm_struct *mm;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	struct vm_area_struct *vma;
	void *private;
	void *private;
};
};


int walk_page_range(unsigned long addr, unsigned long end,
int walk_page_range(struct mm_struct *mm, unsigned long start,
		struct mm_walk *walk);
		unsigned long end, const struct mm_walk_ops *ops,
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
		void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
		void *private);


#endif /* _LINUX_PAGEWALK_H */
#endif /* _LINUX_PAGEWALK_H */
Loading