Loading include/linux/mm.h +2 −0 Original line number Diff line number Diff line Loading @@ -2350,6 +2350,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct task_struct *target_task, struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, Loading mm/madvise.c +43 −14 Original line number Diff line number Diff line Loading @@ -22,11 +22,14 @@ #include <linux/file.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/compat.h> #include <linux/pagewalk.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> #include <linux/sched/mm.h> #include <linux/uio.h> #include <asm/tlb.h> Loading Loading @@ -254,6 +257,7 @@ static long madvise_willneed(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; loff_t offset; Loading Loading @@ -288,12 +292,12 @@ static long madvise_willneed(struct vm_area_struct *vma, */ *prev = NULL; /* tell sys_madvise we drop mmap_sem */ get_file(file); up_read(¤t->mm->mmap_sem); up_read(&mm->mmap_sem); offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); fput(file); down_read(¤t->mm->mmap_sem); down_read(&mm->mmap_sem); return 0; } Loading Loading @@ -682,7 +686,6 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); add_mm_counter(mm, MM_SWAPENTS, nr_swap); } arch_leave_lazy_mmu_mode(); Loading Loading @@ -762,6 +765,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, unsigned long start, unsigned long end, int behavior) { struct mm_struct *mm = vma->vm_mm; *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; Loading @@ -769,8 +774,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, if (!userfaultfd_remove(vma, start, end)) { *prev = NULL; /* mmap_sem has been dropped, prev is stale */ down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, start); down_read(&mm->mmap_sem); vma = find_vma(mm, start); if (!vma) return -ENOMEM; if (start < vma->vm_start) { Loading Loading @@ -824,6 +829,7 @@ static long madvise_remove(struct vm_area_struct *vma, loff_t offset; int error; struct file *f; struct mm_struct *mm = vma->vm_mm; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ Loading Loading @@ -851,13 +857,13 @@ static long madvise_remove(struct vm_area_struct *vma, get_file(f); if (userfaultfd_remove(vma, start, end)) { /* mmap_sem was not released by userfaultfd_remove() */ up_read(¤t->mm->mmap_sem); up_read(&mm->mmap_sem); } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); fput(f); down_read(¤t->mm->mmap_sem); down_read(&mm->mmap_sem); return error; } Loading Loading @@ -1050,7 +1056,8 @@ madvise_behavior_valid(int behavior) * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) int do_madvise(struct task_struct *target_task, struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { unsigned long end, tmp; struct vm_area_struct *vma, *prev; Loading Loading @@ -1088,10 +1095,27 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) write = madvise_need_mmap_write(behavior); if (write) { if (down_write_killable(¤t->mm->mmap_sem)) if (down_write_killable(&mm->mmap_sem)) return -EINTR; /* * We may have stolen the mm from another process * that is undergoing core dumping. * * Right now that's io_ring, in the future it may * be remote process management and not "current" * at all. * * We need to fix core dumping to not do this, * but for now we have the mmget_still_valid() * model. */ if (!mmget_still_valid(mm)) { up_write(&mm->mmap_sem); return -EINTR; } } else { down_read(¤t->mm->mmap_sem); down_read(&mm->mmap_sem); } /* Loading @@ -1099,7 +1123,7 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ vma = find_vma_prev(current->mm, start, &prev); vma = find_vma_prev(mm, start, &prev); if (vma && start > vma->vm_start) prev = vma; Loading Loading @@ -1136,14 +1160,19 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) if (prev) vma = prev->vm_next; else /* madvise_remove dropped mmap_sem */ vma = find_vma(current->mm, start); vma = find_vma(mm, start); } out: blk_finish_plug(&plug); if (write) up_write(¤t->mm->mmap_sem); up_write(&mm->mmap_sem); else up_read(¤t->mm->mmap_sem); up_read(&mm->mmap_sem); return error; } SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { return do_madvise(current, current->mm, start, len_in, behavior); } Loading
include/linux/mm.h +2 −0 Original line number Diff line number Diff line Loading @@ -2350,6 +2350,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct task_struct *target_task, struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, Loading
mm/madvise.c +43 −14 Original line number Diff line number Diff line Loading @@ -22,11 +22,14 @@ #include <linux/file.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/compat.h> #include <linux/pagewalk.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/shmem_fs.h> #include <linux/mmu_notifier.h> #include <linux/sched/mm.h> #include <linux/uio.h> #include <asm/tlb.h> Loading Loading @@ -254,6 +257,7 @@ static long madvise_willneed(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; loff_t offset; Loading Loading @@ -288,12 +292,12 @@ static long madvise_willneed(struct vm_area_struct *vma, */ *prev = NULL; /* tell sys_madvise we drop mmap_sem */ get_file(file); up_read(¤t->mm->mmap_sem); up_read(&mm->mmap_sem); offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); fput(file); down_read(¤t->mm->mmap_sem); down_read(&mm->mmap_sem); return 0; } Loading Loading @@ -682,7 +686,6 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (nr_swap) { if (current->mm == mm) sync_mm_rss(mm); add_mm_counter(mm, MM_SWAPENTS, nr_swap); } arch_leave_lazy_mmu_mode(); Loading Loading @@ -762,6 +765,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, unsigned long start, unsigned long end, int behavior) { struct mm_struct *mm = vma->vm_mm; *prev = vma; if (!can_madv_lru_vma(vma)) return -EINVAL; Loading @@ -769,8 +774,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, if (!userfaultfd_remove(vma, start, end)) { *prev = NULL; /* mmap_sem has been dropped, prev is stale */ down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, start); down_read(&mm->mmap_sem); vma = find_vma(mm, start); if (!vma) return -ENOMEM; if (start < vma->vm_start) { Loading Loading @@ -824,6 +829,7 @@ static long madvise_remove(struct vm_area_struct *vma, loff_t offset; int error; struct file *f; struct mm_struct *mm = vma->vm_mm; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ Loading Loading @@ -851,13 +857,13 @@ static long madvise_remove(struct vm_area_struct *vma, get_file(f); if (userfaultfd_remove(vma, start, end)) { /* mmap_sem was not released by userfaultfd_remove() */ up_read(¤t->mm->mmap_sem); up_read(&mm->mmap_sem); } error = vfs_fallocate(f, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, end - start); fput(f); down_read(¤t->mm->mmap_sem); down_read(&mm->mmap_sem); return error; } Loading Loading @@ -1050,7 +1056,8 @@ madvise_behavior_valid(int behavior) * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) int do_madvise(struct task_struct *target_task, struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { unsigned long end, tmp; struct vm_area_struct *vma, *prev; Loading Loading @@ -1088,10 +1095,27 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) write = madvise_need_mmap_write(behavior); if (write) { if (down_write_killable(¤t->mm->mmap_sem)) if (down_write_killable(&mm->mmap_sem)) return -EINTR; /* * We may have stolen the mm from another process * that is undergoing core dumping. * * Right now that's io_ring, in the future it may * be remote process management and not "current" * at all. * * We need to fix core dumping to not do this, * but for now we have the mmget_still_valid() * model. */ if (!mmget_still_valid(mm)) { up_write(&mm->mmap_sem); return -EINTR; } } else { down_read(¤t->mm->mmap_sem); down_read(&mm->mmap_sem); } /* Loading @@ -1099,7 +1123,7 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) * ranges, just ignore them, but return -ENOMEM at the end. * - different from the way of handling in mlock etc. */ vma = find_vma_prev(current->mm, start, &prev); vma = find_vma_prev(mm, start, &prev); if (vma && start > vma->vm_start) prev = vma; Loading Loading @@ -1136,14 +1160,19 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) if (prev) vma = prev->vm_next; else /* madvise_remove dropped mmap_sem */ vma = find_vma(current->mm, start); vma = find_vma(mm, start); } out: blk_finish_plug(&plug); if (write) up_write(¤t->mm->mmap_sem); up_write(&mm->mmap_sem); else up_read(¤t->mm->mmap_sem); up_read(&mm->mmap_sem); return error; } SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { return do_madvise(current, current->mm, start, len_in, behavior); }