mm: munlock: fix potential race with THP page split (01cc2e58) · Commits · e / devices / android_kernel_fairphone_FP3

mm/mlock.c

+60 −44

Original line number	Diff line number	Diff line
		@@ -90,6 +90,26 @@ void mlock_vma_page(struct page *page)
		}
		}

		/*
		* Isolate a page from LRU with optional get_page() pin.
		* Assumes lru_lock already held and page already pinned.
		*/
		static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
		{
		if (PageLRU(page)) {
		struct lruvec *lruvec;

		lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
		if (getpage)
		get_page(page);
		ClearPageLRU(page);
		del_page_from_lru_list(page, lruvec, page_lru(page));
		return true;
		}

		return false;
		}

		/*
		* Finish munlock after successful page isolation
		*
		@@ -126,9 +146,9 @@ static void __munlock_isolated_page(struct page *page)
		static void __munlock_isolation_failed(struct page *page)
		{
		if (PageUnevictable(page))
		count_vm_event(UNEVICTABLE_PGSTRANDED);
		__count_vm_event(UNEVICTABLE_PGSTRANDED);
		else
		count_vm_event(UNEVICTABLE_PGMUNLOCKED);
		__count_vm_event(UNEVICTABLE_PGMUNLOCKED);
		}

		/**
		@@ -152,28 +172,34 @@ static void __munlock_isolation_failed(struct page *page)
		unsigned int munlock_vma_page(struct page *page)
		{
		unsigned int nr_pages;
		struct zone *zone = page_zone(page);

		BUG_ON(!PageLocked(page));

		if (TestClearPageMlocked(page)) {
		/*
		* Serialize with any parallel __split_huge_page_refcount() which
		* might otherwise copy PageMlocked to part of the tail pages before
		* we clear it in the head page. It also stabilizes hpage_nr_pages().
		*/
		spin_lock_irq(&zone->lru_lock);

		nr_pages = hpage_nr_pages(page);
		mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
		if (!isolate_lru_page(page))
		if (!TestClearPageMlocked(page))
		goto unlock_out;

		__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);

		if (__munlock_isolate_lru_page(page, true)) {
		spin_unlock_irq(&zone->lru_lock);
		__munlock_isolated_page(page);
		else
		__munlock_isolation_failed(page);
		} else {
		nr_pages = hpage_nr_pages(page);
		goto out;
		}
		__munlock_isolation_failed(page);

		/*
		* Regardless of the original PageMlocked flag, we determine nr_pages
		* after touching the flag. This leaves a possible race with a THP page
		* split, such that a whole THP page was munlocked, but nr_pages == 1.
		* Returning a smaller mask due to that is OK, the worst that can
		* happen is subsequent useless scanning of the former tail pages.
		* The NR_MLOCK accounting can however become broken.
		*/
		unlock_out:
		spin_unlock_irq(&zone->lru_lock);

		out:
		return nr_pages - 1;
		}

		@@ -310,35 +336,25 @@ static void __munlock_pagevec(struct pagevec pvec, struct zone zone)
		struct page *page = pvec->pages[i];

		if (TestClearPageMlocked(page)) {
		struct lruvec *lruvec;
		int lru;

		if (PageLRU(page)) {
		lruvec = mem_cgroup_page_lruvec(page, zone);
		lru = page_lru(page);
		/*
		* We already have pin from follow_page_mask()
		* so we can spare the get_page() here.
		*/
		ClearPageLRU(page);
		del_page_from_lru_list(page, lruvec, lru);
		} else {
		if (__munlock_isolate_lru_page(page, false))
		continue;
		else
		__munlock_isolation_failed(page);
		goto skip_munlock;
		}

		} else {
		skip_munlock:
		/*
		* We won't be munlocking this page in the next phase
		* but we still need to release the follow_page_mask()
		* pin. We cannot do it under lru_lock however. If it's
		* the last pin, __page_cache_release would deadlock.
		* the last pin, __page_cache_release() would deadlock.
		*/
		pagevec_add(&pvec_putback, pvec->pages[i]);
		pvec->pages[i] = NULL;
		}
		}
		delta_munlocked = -nr + pagevec_count(&pvec_putback);
		__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
		spin_unlock_irq(&zone->lru_lock);