Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bf929152 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds
Browse files

mm, thp: change pmd_trans_huge_lock() to return taken lock



With split ptlock it's important to know which lock
pmd_trans_huge_lock() took.  This patch adds one more parameter to the
function to return the lock.

In most places migration to new api is trivial.  Exception is
move_huge_pmd(): we need to take two locks if pmd tables are different.

Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: default avatarAlex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9a86cb7b
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
	pte_t *pte;
	spinlock_t *ptl;

	if (pmd_trans_huge_lock(pmd, vma) == 1) {
	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
		spin_unlock(&walk->mm->page_table_lock);
		spin_unlock(ptl);
		mss->anonymous_thp += HPAGE_PMD_SIZE;
		return 0;
	}
@@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{
	struct vm_area_struct *vma;
	struct pagemapread *pm = walk->private;
	spinlock_t *ptl;
	pte_t *pte;
	int err = 0;
	pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));

	/* find the first VMA at or above 'addr' */
	vma = find_vma(walk->mm, addr);
	if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
	if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		int pmd_flags2;

		if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
@@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
			if (err)
				break;
		}
		spin_unlock(&walk->mm->page_table_lock);
		spin_unlock(ptl);
		return err;
	}

@@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,

	md = walk->private;

	if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
	if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
		pte_t huge_pte = *(pte_t *)pmd;
		struct page *page;

@@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
		if (page)
			gather_stats(page, md, pte_dirty(huge_pte),
				     HPAGE_PMD_SIZE/PAGE_SIZE);
		spin_unlock(&walk->mm->page_table_lock);
		spin_unlock(ptl);
		return 0;
	}

+7 −7
Original line number Diff line number Diff line
@@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
				    unsigned long start,
				    unsigned long end,
				    long adjust_next);
extern int __pmd_trans_huge_lock(pmd_t *pmd,
				 struct vm_area_struct *vma);
extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
		spinlock_t **ptl);
/* mmap_sem must be held on entry */
static inline int pmd_trans_huge_lock(pmd_t *pmd,
				      struct vm_area_struct *vma)
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
		spinlock_t **ptl)
{
	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
	if (pmd_trans_huge(*pmd))
		return __pmd_trans_huge_lock(pmd, vma);
		return __pmd_trans_huge_lock(pmd, vma, ptl);
	else
		return 0;
}
@@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
					 long adjust_next)
{
}
static inline int pmd_trans_huge_lock(pmd_t *pmd,
				      struct vm_area_struct *vma)
static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
		spinlock_t **ptl)
{
	return 0;
}
+27 −13
Original line number Diff line number Diff line
@@ -1376,9 +1376,10 @@ out:
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
		 pmd_t *pmd, unsigned long addr)
{
	spinlock_t *ptl;
	int ret = 0;

	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		struct page *page;
		pgtable_t pgtable;
		pmd_t orig_pmd;
@@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
		if (is_huge_zero_pmd(orig_pmd)) {
			atomic_long_dec(&tlb->mm->nr_ptes);
			spin_unlock(&tlb->mm->page_table_lock);
			spin_unlock(ptl);
			put_huge_zero_page();
		} else {
			page = pmd_page(orig_pmd);
@@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
			VM_BUG_ON(!PageHead(page));
			atomic_long_dec(&tlb->mm->nr_ptes);
			spin_unlock(&tlb->mm->page_table_lock);
			spin_unlock(ptl);
			tlb_remove_page(tlb, page);
		}
		pte_free(tlb->mm, pgtable);
@@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, unsigned long end,
		unsigned char *vec)
{
	spinlock_t *ptl;
	int ret = 0;

	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		/*
		 * All logical pages in the range are present
		 * if backed by a huge page.
		 */
		spin_unlock(&vma->vm_mm->page_table_lock);
		spin_unlock(ptl);
		memset(vec, 1, (end - addr) >> PAGE_SHIFT);
		ret = 1;
	}
@@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
		  unsigned long new_addr, unsigned long old_end,
		  pmd_t *old_pmd, pmd_t *new_pmd)
{
	spinlock_t *old_ptl, *new_ptl;
	int ret = 0;
	pmd_t pmd;

@@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
		goto out;
	}

	ret = __pmd_trans_huge_lock(old_pmd, vma);
	/*
	 * We don't have to worry about the ordering of src and dst
	 * ptlocks because exclusive mmap_sem prevents deadlock.
	 */
	ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
	if (ret == 1) {
		new_ptl = pmd_lockptr(mm, new_pmd);
		if (new_ptl != old_ptl)
			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
		VM_BUG_ON(!pmd_none(*new_pmd));
		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
		spin_unlock(&mm->page_table_lock);
		if (new_ptl != old_ptl)
			spin_unlock(new_ptl);
		spin_unlock(old_ptl);
	}
out:
	return ret;
@@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, pgprot_t newprot, int prot_numa)
{
	struct mm_struct *mm = vma->vm_mm;
	spinlock_t *ptl;
	int ret = 0;

	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		pmd_t entry;
		ret = 1;
		if (!prot_numa) {
@@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		if (ret == HPAGE_PMD_NR)
			set_pmd_at(mm, addr, pmd, entry);

		spin_unlock(&vma->vm_mm->page_table_lock);
		spin_unlock(ptl);
	}

	return ret;
@@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 * Note that if it returns 1, this routine returns without unlocking page
 * table locks. So callers must unlock them.
 */
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
		spinlock_t **ptl)
{
	spin_lock(&vma->vm_mm->page_table_lock);
	*ptl = pmd_lock(vma->vm_mm, pmd);
	if (likely(pmd_trans_huge(*pmd))) {
		if (unlikely(pmd_trans_splitting(*pmd))) {
			spin_unlock(&vma->vm_mm->page_table_lock);
			spin_unlock(*ptl);
			wait_split_huge_page(vma->anon_vma, pmd);
			return -1;
		} else {
@@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
			return 1;
		}
	}
	spin_unlock(&vma->vm_mm->page_table_lock);
	spin_unlock(*ptl);
	return 0;
}

+5 −5
Original line number Diff line number Diff line
@@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
	pte_t *pte;
	spinlock_t *ptl;

	if (pmd_trans_huge_lock(pmd, vma) == 1) {
	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
			mc.precharge += HPAGE_PMD_NR;
		spin_unlock(&vma->vm_mm->page_table_lock);
		spin_unlock(ptl);
		return 0;
	}

@@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
	 *    to be unlocked in __split_huge_page_splitting(), where the main
	 *    part of thp split is not executed yet.
	 */
	if (pmd_trans_huge_lock(pmd, vma) == 1) {
	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		if (mc.precharge < HPAGE_PMD_NR) {
			spin_unlock(&vma->vm_mm->page_table_lock);
			spin_unlock(ptl);
			return 0;
		}
		target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
@@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
			}
			put_page(page);
		}
		spin_unlock(&vma->vm_mm->page_table_lock);
		spin_unlock(ptl);
		return 0;
	}