Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bebeb3d6 authored by Michel Lespinasse's avatar Michel Lespinasse Committed by Linus Torvalds
Browse files

mm: introduce mm_populate() for populating new vmas



When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or
with MCL_FUTURE in effect), we want to populate the pages within the
newly created vmas.  This may take a while as we may have to read pages
from disk, so ideally we want to do this outside of the write-locked
mmap_sem region.

This change introduces mm_populate(), which is used to defer populating
such mappings until after the mmap_sem write lock has been released.
This is implemented as a generalization of the former do_mlock_pages(),
which accomplished the same task but was using during mlock() /
mlockall().

Signed-off-by: default avatarMichel Lespinasse <walken@google.com>
Reported-by: default avatarAndy Lutomirski <luto@amacapital.net>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Tested-by: default avatarAndy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 940e7da5
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx)
	unsigned nr_events = ctx->max_reqs;
	unsigned long size;
	int nr_pages;
	bool populate;

	/* Compensate for the ring buffer's head/tail overlap entry */
	nr_events += 2;	/* 1 is required, 2 for good luck */
@@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx)
	down_write(&ctx->mm->mmap_sem);
	info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 
					PROT_READ|PROT_WRITE,
					MAP_ANONYMOUS|MAP_PRIVATE, 0);
					MAP_ANONYMOUS|MAP_PRIVATE, 0,
					&populate);
	if (IS_ERR((void *)info->mmap_base)) {
		up_write(&ctx->mm->mmap_sem);
		info->mmap_size = 0;
@@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx)
		aio_free_ring(ctx);
		return -EAGAIN;
	}
	if (populate)
		mm_populate(info->mmap_base, info->mmap_size);

	ctx->user_id = info->mmap_base;

+15 −3
Original line number Diff line number Diff line
@@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
extern unsigned long mmap_region(struct file *file, unsigned long addr,
	unsigned long len, unsigned long flags,
	vm_flags_t vm_flags, unsigned long pgoff);
extern unsigned long do_mmap_pgoff(struct file *, unsigned long,
        unsigned long, unsigned long,
        unsigned long, unsigned long);
extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
	unsigned long len, unsigned long prot, unsigned long flags,
	unsigned long pgoff, bool *populate);
extern int do_munmap(struct mm_struct *, unsigned long, size_t);

#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
			 int ignore_errors);
static inline void mm_populate(unsigned long addr, unsigned long len)
{
	/* Ignore errors */
	(void) __mm_populate(addr, len, 1);
}
#else
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif

/* These take the mm semaphore themselves */
extern unsigned long vm_brk(unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
+7 −5
Original line number Diff line number Diff line
@@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
	unsigned long flags;
	unsigned long prot;
	int acc_mode;
	unsigned long user_addr;
	struct ipc_namespace *ns;
	struct shm_file_data *sfd;
	struct path path;
	fmode_t f_mode;
	bool populate = false;

	err = -EINVAL;
	if (shmid < 0)
@@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
			goto invalid;
	}
		
	user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0);
	*raddr = user_addr;
	addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
	*raddr = addr;
	err = 0;
	if (IS_ERR_VALUE(user_addr))
		err = (long)user_addr;
	if (IS_ERR_VALUE(addr))
		err = (long)addr;
invalid:
	up_write(&current->mm->mmap_sem);
	if (populate)
		mm_populate(addr, size);

out_fput:
	fput(file);
+11 −6
Original line number Diff line number Diff line
@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on)
	return error;
}

static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
/*
 * __mm_populate - populate and/or mlock pages within a range of address space.
 *
 * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
 * flags. VMAs must be already marked with the desired vm_flags, and
 * mmap_sem must not be held.
 */
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{
	struct mm_struct *mm = current->mm;
	unsigned long end, nstart, nend;
@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
		error = do_mlock(start, len, 1);
	up_write(&current->mm->mmap_sem);
	if (!error)
		error = do_mlock_pages(start, len, 0);
		error = __mm_populate(start, len, 0);
	return error;
}

@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
	    capable(CAP_IPC_LOCK))
		ret = do_mlockall(flags);
	up_write(&current->mm->mmap_sem);
	if (!ret && (flags & MCL_CURRENT)) {
		/* Ignore errors */
		do_mlock_pages(0, TASK_SIZE, 1);
	}
	if (!ret && (flags & MCL_CURRENT))
		mm_populate(0, TASK_SIZE);
out:
	return ret;
}
+15 −5
Original line number Diff line number Diff line
@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint)

unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
			unsigned long len, unsigned long prot,
			unsigned long flags, unsigned long pgoff)
			unsigned long flags, unsigned long pgoff,
			bool *populate)
{
	struct mm_struct * mm = current->mm;
	struct inode *inode;
	vm_flags_t vm_flags;

	*populate = false;

	/*
	 * Does the application expect PROT_READ to imply PROT_EXEC?
	 *
@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
		}
	}

	return mmap_region(file, addr, len, flags, vm_flags, pgoff);
	addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
	if (!IS_ERR_VALUE(addr) &&
	    ((vm_flags & VM_LOCKED) ||
	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
		*populate = true;
	return addr;
}

SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
@@ -1531,10 +1539,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,

	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
	if (vm_flags & VM_LOCKED) {
		if (!mlock_vma_pages_range(vma, addr, addr + len))
		if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
					vma == get_gate_vma(current->mm)))
			mm->locked_vm += (len >> PAGE_SHIFT);
	} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
		make_pages_present(addr, addr + len);
		else
			vma->vm_flags &= ~VM_LOCKED;
	}

	if (file)
		uprobe_mmap(vma);
Loading