Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 53a7706d authored by Michel Lespinasse's avatar Michel Lespinasse Committed by Linus Torvalds
Browse files

mlock: do not hold mmap_sem for extended periods of time



__get_user_pages gets a new 'nonblocking' parameter to signal that the
caller is prepared to re-acquire mmap_sem and retry the operation if
needed.  This is used to split off long operations if they are going to
block on a disk transfer, or when we detect contention on the mmap_sem.

[akpm@linux-foundation.org: remove ref to rwsem_is_contended()]
Signed-off-by: default avatarMichel Lespinasse <walken@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5fdb2002
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -243,7 +243,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,

int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
		     unsigned long start, int len, unsigned int foll_flags,
		     struct page **pages, struct vm_area_struct **vmas);
		     struct page **pages, struct vm_area_struct **vmas,
		     int *nonblocking);

#define ZONE_RECLAIM_NOSCAN	-2
#define ZONE_RECLAIM_FULL	-1
+18 −5
Original line number Diff line number Diff line
@@ -1363,7 +1363,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,

int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
		     unsigned long start, int nr_pages, unsigned int gup_flags,
		     struct page **pages, struct vm_area_struct **vmas)
		     struct page **pages, struct vm_area_struct **vmas,
		     int *nonblocking)
{
	int i;
	unsigned long vm_flags;
@@ -1463,10 +1464,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
			cond_resched();
			while (!(page = follow_page(vma, start, foll_flags))) {
				int ret;
				unsigned int fault_flags = 0;

				if (foll_flags & FOLL_WRITE)
					fault_flags |= FAULT_FLAG_WRITE;
				if (nonblocking)
					fault_flags |= FAULT_FLAG_ALLOW_RETRY;

				ret = handle_mm_fault(mm, vma, start,
					(foll_flags & FOLL_WRITE) ?
					FAULT_FLAG_WRITE : 0);
							fault_flags);

				if (ret & VM_FAULT_ERROR) {
					if (ret & VM_FAULT_OOM)
@@ -1482,6 +1488,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
				else
					tsk->min_flt++;

				if (ret & VM_FAULT_RETRY) {
					*nonblocking = 0;
					return i;
				}

				/*
				 * The VM_FAULT_WRITE bit tells us that
				 * do_wp_page has broken COW when necessary,
@@ -1581,7 +1592,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
	if (force)
		flags |= FOLL_FORCE;

	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
				NULL);
}
EXPORT_SYMBOL(get_user_pages);

@@ -1606,7 +1618,8 @@ struct page *get_dump_page(unsigned long addr)
	struct page *page;

	if (__get_user_pages(current, current->mm, addr, 1,
			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
			     FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
			     NULL) < 1)
		return NULL;
	flush_cache_page(vma, addr, page_to_pfn(page));
	return page;
+23 −17
Original line number Diff line number Diff line
@@ -155,13 +155,13 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
 * vma->vm_mm->mmap_sem must be held for at least read.
 */
static long __mlock_vma_pages_range(struct vm_area_struct *vma,
				    unsigned long start, unsigned long end)
				    unsigned long start, unsigned long end,
				    int *nonblocking)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long addr = start;
	int nr_pages = (end - start) / PAGE_SIZE;
	int gup_flags;
	int ret;

	VM_BUG_ON(start & ~PAGE_MASK);
	VM_BUG_ON(end   & ~PAGE_MASK);
@@ -187,9 +187,8 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
		nr_pages--;
	}

	ret = __get_user_pages(current, mm, addr, nr_pages, gup_flags,
			       NULL, NULL);
	return max(ret, 0);	/* 0 or negative error code */
	return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
				NULL, NULL, nonblocking);
}

/*
@@ -233,7 +232,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
			is_vm_hugetlb_page(vma) ||
			vma == get_gate_vma(current))) {

		__mlock_vma_pages_range(vma, start, end);
		__mlock_vma_pages_range(vma, start, end, NULL);

		/* Hide errors from mmap() and other callers */
		return 0;
@@ -429,21 +428,23 @@ static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
	struct mm_struct *mm = current->mm;
	unsigned long end, nstart, nend;
	struct vm_area_struct *vma = NULL;
	int locked = 0;
	int ret = 0;

	VM_BUG_ON(start & ~PAGE_MASK);
	VM_BUG_ON(len != PAGE_ALIGN(len));
	end = start + len;

	down_read(&mm->mmap_sem);
	for (nstart = start; nstart < end; nstart = nend) {
		/*
		 * We want to fault in pages for [nstart; end) address range.
		 * Find first corresponding VMA.
		 */
		if (!vma)
		if (!locked) {
			locked = 1;
			down_read(&mm->mmap_sem);
			vma = find_vma(mm, nstart);
		else
		} else if (nstart >= vma->vm_end)
			vma = vma->vm_next;
		if (!vma || vma->vm_start >= end)
			break;
@@ -457,18 +458,23 @@ static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
		if (nstart < vma->vm_start)
			nstart = vma->vm_start;
		/*
		 * Now fault in a range of pages within the first VMA.
		 * Now fault in a range of pages. __mlock_vma_pages_range()
		 * double checks the vma flags, so that it won't mlock pages
		 * if the vma was already munlocked.
		 */
		ret = __mlock_vma_pages_range(vma, nstart, nend);
		if (ret < 0 && ignore_errors) {
		ret = __mlock_vma_pages_range(vma, nstart, nend, &locked);
		if (ret < 0) {
			if (ignore_errors) {
				ret = 0;
				continue;	/* continue at next VMA */
			}
		if (ret) {
			ret = __mlock_posix_error_return(ret);
			break;
		}
		nend = nstart + ret * PAGE_SIZE;
		ret = 0;
	}
	if (locked)
		up_read(&mm->mmap_sem);
	return ret;	/* 0 or negative error code */
}
+4 −2
Original line number Diff line number Diff line
@@ -127,7 +127,8 @@ unsigned int kobjsize(const void *objp)

int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
		     unsigned long start, int nr_pages, unsigned int foll_flags,
		     struct page **pages, struct vm_area_struct **vmas)
		     struct page **pages, struct vm_area_struct **vmas,
		     int *retry)
{
	struct vm_area_struct *vma;
	unsigned long vm_flags;
@@ -185,7 +186,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
	if (force)
		flags |= FOLL_FORCE;

	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
				NULL);
}
EXPORT_SYMBOL(get_user_pages);