Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b20ce5e0 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds
Browse files

mm: prepare page_referenced() and page_idle to new THP refcounting



Both page_referenced() and page_idle_clear_pte_refs_one() assume that
THP can only be mapped with PMD, so there's no reason to look on PTEs
for PageTransHuge() pages.  That's no true anymore: THP can be mapped
with PTEs too.

The patch removes PageTransHuge() test from the functions and opencode
page table check.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e90309c9
Loading
Loading
Loading
Loading
+0 −5
Original line number Diff line number Diff line
@@ -48,11 +48,6 @@ enum transparent_hugepage_flag {
#endif
};

extern pmd_t *page_check_address_pmd(struct page *page,
				     struct mm_struct *mm,
				     unsigned long address,
				     spinlock_t **ptl);

#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)

+14 −9
Original line number Diff line number Diff line
@@ -433,20 +433,25 @@ static inline void page_mapcount_reset(struct page *page)
	atomic_set(&(page)->_mapcount, -1);
}

int __page_mapcount(struct page *page);

static inline int page_mapcount(struct page *page)
{
	int ret;
	VM_BUG_ON_PAGE(PageSlab(page), page);

	ret = atomic_read(&page->_mapcount) + 1;
	if (PageCompound(page)) {
		page = compound_head(page);
		ret += atomic_read(compound_mapcount_ptr(page)) + 1;
		if (PageDoubleMap(page))
			ret--;
	if (unlikely(PageCompound(page)))
		return __page_mapcount(page);
	return atomic_read(&page->_mapcount) + 1;
}
	return ret;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int total_mapcount(struct page *page);
#else
static inline int total_mapcount(struct page *page)
{
	return page_mapcount(page);
}
#endif

static inline int page_count(struct page *page)
{
+19 −54
Original line number Diff line number Diff line
@@ -1649,46 +1649,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
	return false;
}

/*
 * This function returns whether a given @page is mapped onto the @address
 * in the virtual space of @mm.
 *
 * When it's true, this function returns *pmd with holding the page table lock
 * and passing it back to the caller via @ptl.
 * If it's false, returns NULL without holding the page table lock.
 */
pmd_t *page_check_address_pmd(struct page *page,
			      struct mm_struct *mm,
			      unsigned long address,
			      spinlock_t **ptl)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	if (address & ~HPAGE_PMD_MASK)
		return NULL;

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
		return NULL;
	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return NULL;
	pmd = pmd_offset(pud, address);

	*ptl = pmd_lock(mm, pmd);
	if (!pmd_present(*pmd))
		goto unlock;
	if (pmd_page(*pmd) != page)
		goto unlock;
	if (pmd_trans_huge(*pmd))
		return pmd;
unlock:
	spin_unlock(*ptl);
	return NULL;
}

#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)

int hugepage_madvise(struct vm_area_struct *vma,
@@ -3097,20 +3057,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
	}
}

static int total_mapcount(struct page *page)
{
	int i, ret;

	ret = compound_mapcount(page);
	for (i = 0; i < HPAGE_PMD_NR; i++)
		ret += atomic_read(&page[i]._mapcount) + 1;

	if (PageDoubleMap(page))
		ret -= HPAGE_PMD_NR;

	return ret;
}

static int __split_huge_page_tail(struct page *head, int tail,
		struct lruvec *lruvec, struct list_head *list)
{
@@ -3211,6 +3157,25 @@ static void __split_huge_page(struct page *page, struct list_head *list)
	}
}

int total_mapcount(struct page *page)
{
	int i, ret;

	VM_BUG_ON_PAGE(PageTail(page), page);

	if (likely(!PageCompound(page)))
		return atomic_read(&page->_mapcount) + 1;

	ret = compound_mapcount(page);
	if (PageHuge(page))
		return ret;
	for (i = 0; i < HPAGE_PMD_NR; i++)
		ret += atomic_read(&page[i]._mapcount) + 1;
	if (PageDoubleMap(page))
		ret -= HPAGE_PMD_NR;
	return ret;
}

/*
 * This function splits huge page into normal pages. @page can point to any
 * subpage of huge page to split. Split doesn't change the position of @page.
+56 −9
Original line number Diff line number Diff line
@@ -56,23 +56,70 @@ static int page_idle_clear_pte_refs_one(struct page *page,
{
	struct mm_struct *mm = vma->vm_mm;
	spinlock_t *ptl;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;
	bool referenced = false;

	if (unlikely(PageTransHuge(page))) {
		pmd = page_check_address_pmd(page, mm, addr, &ptl);
		if (pmd) {
			referenced = pmdp_clear_young_notify(vma, addr, pmd);
	pgd = pgd_offset(mm, addr);
	if (!pgd_present(*pgd))
		return SWAP_AGAIN;
	pud = pud_offset(pgd, addr);
	if (!pud_present(*pud))
		return SWAP_AGAIN;
	pmd = pmd_offset(pud, addr);

	if (pmd_trans_huge(*pmd)) {
		ptl = pmd_lock(mm, pmd);
		if (!pmd_present(*pmd))
			goto unlock_pmd;
		if (unlikely(!pmd_trans_huge(*pmd))) {
			spin_unlock(ptl);
			goto map_pte;
		}

		if (pmd_page(*pmd) != page)
			goto unlock_pmd;

		referenced = pmdp_clear_young_notify(vma, addr, pmd);
		spin_unlock(ptl);
		goto found;
unlock_pmd:
		spin_unlock(ptl);
		return SWAP_AGAIN;
	} else {
		pte = page_check_address(page, mm, addr, &ptl, 0);
		if (pte) {
			referenced = ptep_clear_young_notify(vma, addr, pte);
		pmd_t pmde = *pmd;

		barrier();
		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
			return SWAP_AGAIN;

	}
map_pte:
	pte = pte_offset_map(pmd, addr);
	if (!pte_present(*pte)) {
		pte_unmap(pte);
		return SWAP_AGAIN;
	}

	ptl = pte_lockptr(mm, pmd);
	spin_lock(ptl);

	if (!pte_present(*pte)) {
		pte_unmap_unlock(pte, ptl);
		return SWAP_AGAIN;
	}

	/* THP can be referenced by any subpage */
	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
		pte_unmap_unlock(pte, ptl);
		return SWAP_AGAIN;
	}

	referenced = ptep_clear_young_notify(vma, addr, pte);
	pte_unmap_unlock(pte, ptl);
found:
	if (referenced) {
		clear_page_idle(page);
		/*
+82 −35
Original line number Diff line number Diff line
@@ -814,37 +814,84 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
	spinlock_t *ptl;
	int referenced = 0;
	struct page_referenced_arg *pra = arg;

	if (unlikely(PageTransHuge(page))) {
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

		/*
		 * rmap might return false positives; we must filter
		 * these out using page_check_address_pmd().
		 */
		pmd = page_check_address_pmd(page, mm, address, &ptl);
		if (!pmd)
	if (unlikely(PageHuge(page))) {
		/* when pud is not present, pte will be NULL */
		pte = huge_pte_offset(mm, address);
		if (!pte)
			return SWAP_AGAIN;

		if (vma->vm_flags & VM_LOCKED) {
		ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
		goto check_pte;
	}

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
		return SWAP_AGAIN;
	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return SWAP_AGAIN;
	pmd = pmd_offset(pud, address);

	if (pmd_trans_huge(*pmd)) {
		int ret = SWAP_AGAIN;

		ptl = pmd_lock(mm, pmd);
		if (!pmd_present(*pmd))
			goto unlock_pmd;
		if (unlikely(!pmd_trans_huge(*pmd))) {
			spin_unlock(ptl);
			goto map_pte;
		}

		if (pmd_page(*pmd) != page)
			goto unlock_pmd;

		if (vma->vm_flags & VM_LOCKED) {
			pra->vm_flags |= VM_LOCKED;
			return SWAP_FAIL; /* To break the loop */
			ret = SWAP_FAIL; /* To break the loop */
			goto unlock_pmd;
		}

		if (pmdp_clear_flush_young_notify(vma, address, pmd))
			referenced++;
		spin_unlock(ptl);
		goto found;
unlock_pmd:
		spin_unlock(ptl);
		return ret;
	} else {
		pte_t *pte;
		pmd_t pmde = *pmd;

		/*
		 * rmap might return false positives; we must filter
		 * these out using page_check_address().
		 */
		pte = page_check_address(page, mm, address, &ptl, 0);
		if (!pte)
		barrier();
		if (!pmd_present(pmde) || pmd_trans_huge(pmde))
			return SWAP_AGAIN;
	}
map_pte:
	pte = pte_offset_map(pmd, address);
	if (!pte_present(*pte)) {
		pte_unmap(pte);
		return SWAP_AGAIN;
	}

	ptl = pte_lockptr(mm, pmd);
check_pte:
	spin_lock(ptl);

	if (!pte_present(*pte)) {
		pte_unmap_unlock(pte, ptl);
		return SWAP_AGAIN;
	}

	/* THP can be referenced by any subpage */
	if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
		pte_unmap_unlock(pte, ptl);
		return SWAP_AGAIN;
	}

	if (vma->vm_flags & VM_LOCKED) {
		pte_unmap_unlock(pte, ptl);
@@ -864,8 +911,8 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
			referenced++;
	}
	pte_unmap_unlock(pte, ptl);
	}

found:
	if (referenced)
		clear_page_idle(page);
	if (test_and_clear_page_young(page))
@@ -912,7 +959,7 @@ int page_referenced(struct page *page,
	int ret;
	int we_locked = 0;
	struct page_referenced_arg pra = {
		.mapcount = page_mapcount(page),
		.mapcount = total_mapcount(page),
		.memcg = memcg,
	};
	struct rmap_walk_control rwc = {
Loading