Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 657e3038 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds
Browse files

shmem, thp: respect MADV_{NO,}HUGEPAGE for file mappings

Let's wire up existing madvise() hugepage hints for file mappings.

MADV_HUGEPAGE advise shmem to allocate huge page on page fault in the
VMA.  It only has effect if the filesystem is mounted with huge=advise
or huge=within_size.

MADV_NOHUGEPAGE prevents hugepage from being allocated on page fault in
the VMA.  It doesn't prevent a huge page from being allocated by other
means, i.e.  page fault into different mapping or write(2) into file.

Link: http://lkml.kernel.org/r/1466021202-61880-31-git-send-email-kirill.shutemov@linux.intel.com


Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 800d8c63
Loading
Loading
Loading
Loading
+5 −14
Original line number Diff line number Diff line
@@ -1830,7 +1830,7 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
	return NULL;
}

#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)

int hugepage_madvise(struct vm_area_struct *vma,
		     unsigned long *vm_flags, int advice)
@@ -1846,11 +1846,6 @@ int hugepage_madvise(struct vm_area_struct *vma,
		if (mm_has_pgste(vma->vm_mm))
			return 0;
#endif
		/*
		 * Be somewhat over-protective like KSM for now!
		 */
		if (*vm_flags & VM_NO_THP)
			return -EINVAL;
		*vm_flags &= ~VM_NOHUGEPAGE;
		*vm_flags |= VM_HUGEPAGE;
		/*
@@ -1858,15 +1853,11 @@ int hugepage_madvise(struct vm_area_struct *vma,
		 * register it here without waiting a page fault that
		 * may not happen any time soon.
		 */
		if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
		if (!(*vm_flags & VM_NO_KHUGEPAGED) &&
				khugepaged_enter_vma_merge(vma, *vm_flags))
			return -ENOMEM;
		break;
	case MADV_NOHUGEPAGE:
		/*
		 * Be somewhat over-protective like KSM for now!
		 */
		if (*vm_flags & VM_NO_THP)
			return -EINVAL;
		*vm_flags &= ~VM_HUGEPAGE;
		*vm_flags |= VM_NOHUGEPAGE;
		/*
@@ -1974,7 +1965,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
		 * page fault if needed.
		 */
		return 0;
	if (vma->vm_ops || (vm_flags & VM_NO_THP))
	if (vma->vm_ops || (vm_flags & VM_NO_KHUGEPAGED))
		/* khugepaged not yet working on file or special mappings */
		return 0;
	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
@@ -2366,7 +2357,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
		return false;
	if (is_vma_temporary_stack(vma))
		return false;
	return !(vma->vm_flags & VM_NO_THP);
	return !(vma->vm_flags & VM_NO_KHUGEPAGED);
}

/*
+17 −3
Original line number Diff line number Diff line
@@ -101,6 +101,8 @@ struct shmem_falloc {
enum sgp_type {
	SGP_READ,	/* don't exceed i_size, don't allocate page */
	SGP_CACHE,	/* don't exceed i_size, may allocate page */
	SGP_NOHUGE,	/* like SGP_CACHE, but no huge pages */
	SGP_HUGE,	/* like SGP_CACHE, huge pages preferred */
	SGP_WRITE,	/* may exceed i_size, may allocate !Uptodate page */
	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
};
@@ -1409,6 +1411,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
	struct mem_cgroup *memcg;
	struct page *page;
	swp_entry_t swap;
	enum sgp_type sgp_huge = sgp;
	pgoff_t hindex = index;
	int error;
	int once = 0;
@@ -1416,6 +1419,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,

	if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
		return -EFBIG;
	if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
		sgp = SGP_CACHE;
repeat:
	swap.val = 0;
	page = find_lock_entry(mapping, index);
@@ -1534,7 +1539,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
		/* shmem_symlink() */
		if (mapping->a_ops != &shmem_aops)
			goto alloc_nohuge;
		if (shmem_huge == SHMEM_HUGE_DENY)
		if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
			goto alloc_nohuge;
		if (shmem_huge == SHMEM_HUGE_FORCE)
			goto alloc_huge;
@@ -1551,7 +1556,9 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
				goto alloc_huge;
			/* fallthrough */
		case SHMEM_HUGE_ADVISE:
			/* TODO: wire up fadvise()/madvise() */
			if (sgp_huge == SGP_HUGE)
				goto alloc_huge;
			/* TODO: implement fadvise() hints */
			goto alloc_nohuge;
		}

@@ -1680,6 +1687,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct inode *inode = file_inode(vma->vm_file);
	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
	enum sgp_type sgp;
	int error;
	int ret = VM_FAULT_LOCKED;

@@ -1741,7 +1749,13 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
		spin_unlock(&inode->i_lock);
	}

	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
	sgp = SGP_CACHE;
	if (vma->vm_flags & VM_HUGEPAGE)
		sgp = SGP_HUGE;
	else if (vma->vm_flags & VM_NOHUGEPAGE)
		sgp = SGP_NOHUGE;

	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
				  gfp, vma->vm_mm, &ret);
	if (error)
		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);