Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9532fec1 authored by Mel Gorman's avatar Mel Gorman
Browse files

mm: numa: Migrate pages handled during a pmd_numa hinting fault



To say that the PMD handling code was incorrectly transferred from autonuma
is an understatement. The intention was to handle a PMDs worth of pages
in the same fault and effectively batch the taking of the PTL and page
migration. The copied version instead has the impact of clearing a number
of pte_numa PTE entries and whether any page migration takes place depends
on racing. This just happens to work in some cases.

This patch handles pte_numa faults in batch when a pmd_numa fault is
handled. The pages are migrated if they are currently misplaced.
Essentially this is making an assumption that NUMA locality is
on a PMD boundary but that could be addressed by only setting
pmd_numa if all the pages within that PMD are on the same node
if necessary.

Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
parent 5606e387
Loading
Loading
Loading
Loading
+34 −17
Original line number Diff line number Diff line
@@ -3449,6 +3449,18 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}

int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
				unsigned long addr, int current_nid)
{
	get_page(page);

	count_vm_numa_event(NUMA_HINT_FAULTS);
	if (current_nid == numa_node_id())
		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);

	return mpol_misplaced(page, vma, addr);
}

int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
{
@@ -3477,18 +3489,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
	set_pte_at(mm, addr, ptep, pte);
	update_mmu_cache(vma, addr, ptep);

	count_vm_numa_event(NUMA_HINT_FAULTS);
	page = vm_normal_page(vma, addr, pte);
	if (!page) {
		pte_unmap_unlock(ptep, ptl);
		return 0;
	}

	get_page(page);
	current_nid = page_to_nid(page);
	if (current_nid == numa_node_id())
		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
	target_nid = mpol_misplaced(page, vma, addr);
	target_nid = numa_migrate_prep(page, vma, addr, current_nid);
	pte_unmap_unlock(ptep, ptl);
	if (target_nid == -1) {
		/*
@@ -3505,6 +3513,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
		current_nid = target_nid;

out:
	if (current_nid != -1)
		task_numa_fault(current_nid, 1);
	return 0;
}
@@ -3521,8 +3530,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
	spinlock_t *ptl;
	bool numa = false;
	int local_nid = numa_node_id();
	unsigned long nr_faults = 0;
	unsigned long nr_faults_local = 0;

	spin_lock(&mm->page_table_lock);
	pmd = *pmdp;
@@ -3545,7 +3552,8 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
		pte_t pteval = *pte;
		struct page *page;
		int curr_nid;
		int curr_nid = local_nid;
		int target_nid;
		if (!pte_present(pteval))
			continue;
		if (!pte_numa(pteval))
@@ -3566,21 +3574,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
		/* only check non-shared pages */
		if (unlikely(page_mapcount(page) != 1))
			continue;
		pte_unmap_unlock(pte, ptl);

		curr_nid = page_to_nid(page);
		task_numa_fault(curr_nid, 1);
		/*
		 * Note that the NUMA fault is later accounted to either
		 * the node that is currently running or where the page is
		 * migrated to.
		 */
		curr_nid = local_nid;
		target_nid = numa_migrate_prep(page, vma, addr,
					       page_to_nid(page));
		if (target_nid == -1) {
			put_page(page);
			continue;
		}

		nr_faults++;
		if (curr_nid == local_nid)
			nr_faults_local++;
		/* Migrate to the requested node */
		pte_unmap_unlock(pte, ptl);
		if (migrate_misplaced_page(page, target_nid))
			curr_nid = target_nid;
		task_numa_fault(curr_nid, 1);

		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
	}
	pte_unmap_unlock(orig_pte, ptl);

	count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
	count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
	return 0;
}
#else
+20 −5
Original line number Diff line number Diff line
@@ -37,12 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)

static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, unsigned long end, pgprot_t newprot,
		int dirty_accountable, int prot_numa)
		int dirty_accountable, int prot_numa, bool *ret_all_same_node)
{
	struct mm_struct *mm = vma->vm_mm;
	pte_t *pte, oldpte;
	spinlock_t *ptl;
	unsigned long pages = 0;
	bool all_same_node = true;
	int last_nid = -1;

	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
	arch_enter_lazy_mmu_mode();
@@ -61,6 +63,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,

				page = vm_normal_page(vma, addr, oldpte);
				if (page) {
					int this_nid = page_to_nid(page);
					if (last_nid == -1)
						last_nid = this_nid;
					if (last_nid != this_nid)
						all_same_node = false;

					/* only check non-shared pages */
					if (!pte_numa(oldpte) &&
					    page_mapcount(page) == 1) {
@@ -81,7 +89,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,

			if (updated)
				pages++;

			ptep_modify_prot_commit(mm, addr, pte, ptent);
		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
			swp_entry_t entry = pte_to_swp_entry(oldpte);
@@ -101,6 +108,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
	arch_leave_lazy_mmu_mode();
	pte_unmap_unlock(pte - 1, ptl);

	*ret_all_same_node = all_same_node;
	return pages;
}

@@ -127,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
	pmd_t *pmd;
	unsigned long next;
	unsigned long pages = 0;
	bool all_same_node;

	pmd = pmd_offset(pud, addr);
	do {
@@ -143,9 +152,15 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
		if (pmd_none_or_clear_bad(pmd))
			continue;
		pages += change_pte_range(vma, pmd, addr, next, newprot,
				 dirty_accountable, prot_numa);
				 dirty_accountable, prot_numa, &all_same_node);

		if (prot_numa)
		/*
		 * If we are changing protections for NUMA hinting faults then
		 * set pmd_numa if the examined pages were all on the same
		 * node. This allows a regular PMD to be handled as one fault
		 * and effectively batches the taking of the PTL
		 */
		if (prot_numa && all_same_node)
			change_pmd_protnuma(vma->vm_mm, addr, pmd);
	} while (pmd++, addr = next, addr != end);