mm, oom: fix concurrent munlock and oom reaper unmap, v3 (27ae357f) · Commits · e / devices / android_kernel_fairphone_FP4

include/linux/oom.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
		return 0;
		}

		void __oom_reap_task_mm(struct mm_struct *mm);

		extern unsigned long oom_badness(struct task_struct *p,
		struct mem_cgroup memcg, const nodemask_t nodemask,
		unsigned long totalpages);

mm/mmap.c

+26 −18

Original line number	Diff line number	Diff line
		@@ -3024,6 +3024,32 @@ void exit_mmap(struct mm_struct *mm)
		/* mm's last user has gone, and its about to be pulled down */
		mmu_notifier_release(mm);

		if (unlikely(mm_is_oom_victim(mm))) {
		/*
		* Manually reap the mm to free as much memory as possible.
		* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
		* this mm from further consideration. Taking mm->mmap_sem for
		* write after setting MMF_OOM_SKIP will guarantee that the oom
		* reaper will not run on this mm again after mmap_sem is
		* dropped.
		*
		* Nothing can be holding mm->mmap_sem here and the above call
		* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
		* __oom_reap_task_mm() will not block.
		*
		* This needs to be done before calling munlock_vma_pages_all(),
		* which clears VM_LOCKED, otherwise the oom reaper cannot
		* reliably test it.
		*/
		mutex_lock(&oom_lock);
		__oom_reap_task_mm(mm);
		mutex_unlock(&oom_lock);

		set_bit(MMF_OOM_SKIP, &mm->flags);
		down_write(&mm->mmap_sem);
		up_write(&mm->mmap_sem);
		}

		if (mm->locked_vm) {
		vma = mm->mmap;
		while (vma) {
		@@ -3045,24 +3071,6 @@ void exit_mmap(struct mm_struct *mm)
		/* update_hiwater_rss(mm) here? but nobody should be looking */
		/* Use -1 here to ensure all VMAs in the mm are unmapped */
		unmap_vmas(&tlb, vma, 0, -1);

		if (unlikely(mm_is_oom_victim(mm))) {
		/*
		* Wait for oom_reap_task() to stop working on this
		* mm. Because MMF_OOM_SKIP is already set before
		* calling down_read(), oom_reap_task() will not run
		* on this "mm" post up_write().
		*
		* mm_is_oom_victim() cannot be set from under us
		* either because victim->mm is already set to NULL
		* under task_lock before calling mmput and oom_mm is
		* set not NULL by the OOM killer only if victim->mm
		* is found not NULL while holding the task_lock.
		*/
		set_bit(MMF_OOM_SKIP, &mm->flags);
		down_write(&mm->mmap_sem);
		up_write(&mm->mmap_sem);
		}
		free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
		tlb_finish_mmu(&tlb, 0, -1);

mm/oom_kill.c

+43 −38

Original line number	Diff line number	Diff line
		@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct p, struct mm_struct mm)
		return false;
		}


		#ifdef CONFIG_MMU
		/*
		* OOM Reaper kernel thread which tries to reap the memory used by the OOM
		@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
		static struct task_struct *oom_reaper_list;
		static DEFINE_SPINLOCK(oom_reaper_lock);

		static bool __oom_reap_task_mm(struct task_struct tsk, struct mm_struct mm)
		void __oom_reap_task_mm(struct mm_struct *mm)
		{
		struct mmu_gather tlb;
		struct vm_area_struct *vma;

		/*
		* Tell all users of get_user/copy_from_user etc... that the content
		* is no longer stable. No barriers really needed because unmapping
		* should imply barriers already and the reader would hit a page fault
		* if it stumbled over a reaped memory.
		*/
		set_bit(MMF_UNSTABLE, &mm->flags);

		for (vma = mm->mmap ; vma; vma = vma->vm_next) {
		if (!can_madv_dontneed_vma(vma))
		continue;

		/*
		* Only anonymous pages have a good chance to be dropped
		* without additional steps which we cannot afford as we
		* are OOM already.
		*
		* We do not even care about fs backed pages because all
		* which are reclaimable have already been reclaimed and
		* we do not want to block exit_mmap by keeping mm ref
		* count elevated without a good reason.
		*/
		if (vma_is_anonymous(vma) \|\| !(vma->vm_flags & VM_SHARED)) {
		const unsigned long start = vma->vm_start;
		const unsigned long end = vma->vm_end;
		struct mmu_gather tlb;

		tlb_gather_mmu(&tlb, mm, start, end);
		mmu_notifier_invalidate_range_start(mm, start, end);
		unmap_page_range(&tlb, vma, start, end, NULL);
		mmu_notifier_invalidate_range_end(mm, start, end);
		tlb_finish_mmu(&tlb, start, end);
		}
		}
		}

		static bool oom_reap_task_mm(struct task_struct tsk, struct mm_struct mm)
		{
		bool ret = true;

		/*
		* We have to make sure to not race with the victim exit path
		* and cause premature new oom victim selection:
		* __oom_reap_task_mm exit_mm
		* oom_reap_task_mm exit_mm
		* mmget_not_zero
		* mmput
		* atomic_dec_and_test
		@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct tsk, struct mm_struct mm)

		trace_start_task_reaping(tsk->pid);

		/*
		* Tell all users of get_user/copy_from_user etc... that the content
		* is no longer stable. No barriers really needed because unmapping
		* should imply barriers already and the reader would hit a page fault
		* if it stumbled over a reaped memory.
		*/
		set_bit(MMF_UNSTABLE, &mm->flags);

		for (vma = mm->mmap ; vma; vma = vma->vm_next) {
		if (!can_madv_dontneed_vma(vma))
		continue;

		/*
		* Only anonymous pages have a good chance to be dropped
		* without additional steps which we cannot afford as we
		* are OOM already.
		*
		* We do not even care about fs backed pages because all
		* which are reclaimable have already been reclaimed and
		* we do not want to block exit_mmap by keeping mm ref
		* count elevated without a good reason.
		*/
		if (vma_is_anonymous(vma) \|\| !(vma->vm_flags & VM_SHARED)) {
		const unsigned long start = vma->vm_start;
		const unsigned long end = vma->vm_end;
		__oom_reap_task_mm(mm);

		tlb_gather_mmu(&tlb, mm, start, end);
		mmu_notifier_invalidate_range_start(mm, start, end);
		unmap_page_range(&tlb, vma, start, end, NULL);
		mmu_notifier_invalidate_range_end(mm, start, end);
		tlb_finish_mmu(&tlb, start, end);
		}
		}
		pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
		task_pid_nr(tsk), tsk->comm,
		K(get_mm_counter(mm, MM_ANONPAGES)),
		@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
		struct mm_struct *mm = tsk->signal->oom_mm;

		/* Retry the down_read_trylock(mmap_sem) a few times */
		while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
		while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
		schedule_timeout_idle(HZ/10);

		if (attempts <= MAX_OOM_REAP_RETRIES \|\|
		test_bit(MMF_OOM_SKIP, &mm->flags))
		goto done;


		pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
		task_pid_nr(tsk), tsk->comm);
		debug_show_all_locks();