mm: mmu_gather rework (d16dfc55) · Commits · e / devices / android_kernel_oneplus_sm7250

fs/exec.c

+5 −5

Original line number	Original line	Diff line number	Diff line
	@@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
	unsigned long length = old_end - old_start;		unsigned long length = old_end - old_start;
	unsigned long new_start = old_start - shift;		unsigned long new_start = old_start - shift;
	unsigned long new_end = old_end - shift;		unsigned long new_end = old_end - shift;
	struct mmu_gather *tlb;		struct mmu_gather tlb;

	BUG_ON(new_start > new_end);		BUG_ON(new_start > new_end);

	@@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
	return -ENOMEM;		return -ENOMEM;

	lru_add_drain();		lru_add_drain();
	tlb = tlb_gather_mmu(mm, 0);		tlb_gather_mmu(&tlb, mm, 0);
	if (new_end > old_start) {		if (new_end > old_start) {
	/*		/*
	* when the old and new regions overlap clear from new_end.		* when the old and new regions overlap clear from new_end.
	*/		*/
	free_pgd_range(tlb, new_end, old_end, new_end,		free_pgd_range(&tlb, new_end, old_end, new_end,
	vma->vm_next ? vma->vm_next->vm_start : 0);		vma->vm_next ? vma->vm_next->vm_start : 0);
	} else {		} else {
	/*		/*
	@@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
	* have constraints on va-space that make this illegal (IA64) -		* have constraints on va-space that make this illegal (IA64) -
	* for the others its just a little faster.		* for the others its just a little faster.
	*/		*/
	free_pgd_range(tlb, old_start, old_end, new_end,		free_pgd_range(&tlb, old_start, old_end, new_end,
	vma->vm_next ? vma->vm_next->vm_start : 0);		vma->vm_next ? vma->vm_next->vm_start : 0);
	}		}
	tlb_finish_mmu(tlb, new_end, old_end);		tlb_finish_mmu(&tlb, new_end, old_end);

	/*		/*
	* Shrink the vma to just the new range. Always succeeds.		* Shrink the vma to just the new range. Always succeeds.

include/asm-generic/tlb.h

+69 −27

Original line number	Original line	Diff line number	Diff line
	@@ -5,6 +5,8 @@
	* Copyright 2001 Red Hat, Inc.		* Copyright 2001 Red Hat, Inc.
	* Based on code from mm/memory.c Copyright Linus Torvalds and others.		* Based on code from mm/memory.c Copyright Linus Torvalds and others.
	*		*
			* Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
			*
	* This program is free software; you can redistribute it and/or		* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public License		* modify it under the terms of the GNU General Public License
	* as published by the Free Software Foundation; either version		* as published by the Free Software Foundation; either version
	@@ -22,51 +24,71 @@
	* and page free order so much..		* and page free order so much..
	*/		*/
	#ifdef CONFIG_SMP		#ifdef CONFIG_SMP
	#ifdef ARCH_FREE_PTR_NR
	#define FREE_PTR_NR ARCH_FREE_PTR_NR
	#else
	#define FREE_PTE_NR 506
	#endif
	#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)		#define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
	#else		#else
	#define FREE_PTE_NR 1
	#define tlb_fast_mode(tlb) 1		#define tlb_fast_mode(tlb) 1
	#endif		#endif

			/*
			* If we can't allocate a page to make a big batch of page pointers
			* to work on, then just handle a few from the on-stack structure.
			*/
			#define MMU_GATHER_BUNDLE 8

	/* struct mmu_gather is an opaque type used by the mm code for passing around		/* struct mmu_gather is an opaque type used by the mm code for passing around
	* any data needed by arch specific code for tlb_remove_page.		* any data needed by arch specific code for tlb_remove_page.
	*/		*/
	struct mmu_gather {		struct mmu_gather {
	struct mm_struct *mm;		struct mm_struct *mm;
	unsigned int nr; /* set to ~0U means fast mode */		unsigned int nr; /* set to ~0U means fast mode */
			unsigned int max; /* nr < max */
	unsigned int need_flush;/* Really unmapped some ptes? */		unsigned int need_flush;/* Really unmapped some ptes? */
	unsigned int fullmm; /* non-zero means full mm flush */		unsigned int fullmm; /* non-zero means full mm flush */
	struct page * pages[FREE_PTE_NR];		#ifdef HAVE_ARCH_MMU_GATHER
			struct arch_mmu_gather arch;
			#endif
			struct page **pages;
			struct page *local[MMU_GATHER_BUNDLE];
	};		};

	/* Users of the generic TLB shootdown code must declare this storage space. */		static inline void __tlb_alloc_page(struct mmu_gather *tlb)
	DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);		{
			unsigned long addr = __get_free_pages(GFP_NOWAIT \| __GFP_NOWARN, 0);

			if (addr) {
			tlb->pages = (void *)addr;
			tlb->max = PAGE_SIZE / sizeof(struct page *);
			}
			}

	/* tlb_gather_mmu		/* tlb_gather_mmu
	* Return a pointer to an initialized struct mmu_gather.		* Called to initialize an (on-stack) mmu_gather structure for page-table
			* tear-down from @mm. The @fullmm argument is used when @mm is without
			* users and we're going to destroy the full address space (exit/execve).
	*/		*/
	static inline struct mmu_gather *		static inline void
	tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)		tlb_gather_mmu(struct mmu_gather tlb, struct mm_struct mm, bool fullmm)
	{		{
	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);

	tlb->mm = mm;		tlb->mm = mm;

	/* Use fast mode if only one CPU is online */		tlb->max = ARRAY_SIZE(tlb->local);
	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;		tlb->pages = tlb->local;

			if (num_online_cpus() > 1) {
			tlb->nr = 0;
			__tlb_alloc_page(tlb);
			} else /* Use fast mode if only one CPU is online */
			tlb->nr = ~0U;

	tlb->fullmm = full_mm_flush;		tlb->fullmm = fullmm;

	return tlb;		#ifdef HAVE_ARCH_MMU_GATHER
			tlb->arch = ARCH_MMU_GATHER_INIT;
			#endif
	}		}

	static inline void		static inline void
	tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)		tlb_flush_mmu(struct mmu_gather *tlb)
	{		{
	if (!tlb->need_flush)		if (!tlb->need_flush)
	return;		return;
	@@ -75,6 +97,13 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
	if (!tlb_fast_mode(tlb)) {		if (!tlb_fast_mode(tlb)) {
	free_pages_and_swap_cache(tlb->pages, tlb->nr);		free_pages_and_swap_cache(tlb->pages, tlb->nr);
	tlb->nr = 0;		tlb->nr = 0;
			/*
			* If we are using the local on-stack array of pages for MMU
			* gather, try allocating an off-stack array again as we have
			* recently freed pages.
			*/
			if (tlb->pages == tlb->local)
			__tlb_alloc_page(tlb);
	}		}
	}		}

	@@ -85,29 +114,42 @@ tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
	static inline void		static inline void
	tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)		tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
	{		{
	tlb_flush_mmu(tlb, start, end);		tlb_flush_mmu(tlb);

	/* keep the page table cache within bounds */		/* keep the page table cache within bounds */
	check_pgt_cache();		check_pgt_cache();

	put_cpu_var(mmu_gathers);		if (tlb->pages != tlb->local)
			free_pages((unsigned long)tlb->pages, 0);
	}		}

	/* tlb_remove_page		/* __tlb_remove_page
	* Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while		* Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
	* handling the additional races in SMP caused by other CPUs caching valid		* handling the additional races in SMP caused by other CPUs caching valid
	* mappings in their TLBs.		* mappings in their TLBs. Returns the number of free page slots left.
			* When out of page slots we must call tlb_flush_mmu().
	*/		*/
	static inline void tlb_remove_page(struct mmu_gather tlb, struct page page)		static inline int __tlb_remove_page(struct mmu_gather tlb, struct page page)
	{		{
	tlb->need_flush = 1;		tlb->need_flush = 1;
	if (tlb_fast_mode(tlb)) {		if (tlb_fast_mode(tlb)) {
	free_page_and_swap_cache(page);		free_page_and_swap_cache(page);
	return;		return 1; /* avoid calling tlb_flush_mmu() */
	}		}
	tlb->pages[tlb->nr++] = page;		tlb->pages[tlb->nr++] = page;
	if (tlb->nr >= FREE_PTE_NR)		VM_BUG_ON(tlb->nr > tlb->max);
	tlb_flush_mmu(tlb, 0, 0);
			return tlb->max - tlb->nr;
			}

			/* tlb_remove_page
			* Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
			* required.
			*/
			static inline void tlb_remove_page(struct mmu_gather tlb, struct page page)
			{
			if (!__tlb_remove_page(tlb, page))
			tlb_flush_mmu(tlb);
	}		}

	/**		/**

include/linux/mm.h

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -906,7 +906,7 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
	unsigned long size);		unsigned long size);
	unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,		unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
	unsigned long size, struct zap_details *);		unsigned long size, struct zap_details *);
	unsigned long unmap_vmas(struct mmu_gather **tlb,		unsigned long unmap_vmas(struct mmu_gather *tlb,
	struct vm_area_struct *start_vma, unsigned long start_addr,		struct vm_area_struct *start_vma, unsigned long start_addr,
	unsigned long end_addr, unsigned long *nr_accounted,		unsigned long end_addr, unsigned long *nr_accounted,
	struct zap_details *);		struct zap_details *);

mm/memory.c

+23 −23

Original line number	Original line	Diff line number	Diff line
	@@ -912,12 +912,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
	long zap_work, struct zap_details details)		long zap_work, struct zap_details details)
	{		{
	struct mm_struct *mm = tlb->mm;		struct mm_struct *mm = tlb->mm;
			int force_flush = 0;
	pte_t *pte;		pte_t *pte;
	spinlock_t *ptl;		spinlock_t *ptl;
	int rss[NR_MM_COUNTERS];		int rss[NR_MM_COUNTERS];

	init_rss_vec(rss);		init_rss_vec(rss);
			again:
	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
	arch_enter_lazy_mmu_mode();		arch_enter_lazy_mmu_mode();
	do {		do {
	@@ -974,7 +975,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
	page_remove_rmap(page);		page_remove_rmap(page);
	if (unlikely(page_mapcount(page) < 0))		if (unlikely(page_mapcount(page) < 0))
	print_bad_pte(vma, addr, ptent, page);		print_bad_pte(vma, addr, ptent, page);
	tlb_remove_page(tlb, page);		force_flush = !__tlb_remove_page(tlb, page);
			if (force_flush)
			break;
	continue;		continue;
	}		}
	/*		/*
	@@ -1001,6 +1004,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
	arch_leave_lazy_mmu_mode();		arch_leave_lazy_mmu_mode();
	pte_unmap_unlock(pte - 1, ptl);		pte_unmap_unlock(pte - 1, ptl);

			/*
			* mmu_gather ran out of room to batch pages, we break out of
			* the PTE lock to avoid doing the potential expensive TLB invalidate
			* and page-free while holding it.
			*/
			if (force_flush) {
			force_flush = 0;
			tlb_flush_mmu(tlb);
			if (addr != end)
			goto again;
			}

	return addr;		return addr;
	}		}

	@@ -1121,17 +1136,14 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
	* ensure that any thus-far unmapped pages are flushed before unmap_vmas()		* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
	* drops the lock and schedules.		* drops the lock and schedules.
	*/		*/
	unsigned long unmap_vmas(struct mmu_gather **tlbp,		unsigned long unmap_vmas(struct mmu_gather *tlb,
	struct vm_area_struct *vma, unsigned long start_addr,		struct vm_area_struct *vma, unsigned long start_addr,
	unsigned long end_addr, unsigned long *nr_accounted,		unsigned long end_addr, unsigned long *nr_accounted,
	struct zap_details *details)		struct zap_details *details)
	{		{
	long zap_work = ZAP_BLOCK_SIZE;		long zap_work = ZAP_BLOCK_SIZE;
	unsigned long tlb_start = 0; /* For tlb_finish_mmu */
	int tlb_start_valid = 0;
	unsigned long start = start_addr;		unsigned long start = start_addr;
	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;		spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
	int fullmm = (*tlbp)->fullmm;
	struct mm_struct *mm = vma->vm_mm;		struct mm_struct *mm = vma->vm_mm;

	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);		mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
	@@ -1152,11 +1164,6 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
	untrack_pfn_vma(vma, 0, 0);		untrack_pfn_vma(vma, 0, 0);

	while (start != end) {		while (start != end) {
	if (!tlb_start_valid) {
	tlb_start = start;
	tlb_start_valid = 1;
	}

	if (unlikely(is_vm_hugetlb_page(vma))) {		if (unlikely(is_vm_hugetlb_page(vma))) {
	/*		/*
	* It is undesirable to test vma->vm_file as it		* It is undesirable to test vma->vm_file as it
	@@ -1177,7 +1184,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,

	start = end;		start = end;
	} else		} else
	start = unmap_page_range(*tlbp, vma,		start = unmap_page_range(tlb, vma,
	start, end, &zap_work, details);		start, end, &zap_work, details);

	if (zap_work > 0) {		if (zap_work > 0) {
	@@ -1185,19 +1192,13 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
	break;		break;
	}		}

	tlb_finish_mmu(*tlbp, tlb_start, start);

	if (need_resched() \|\|		if (need_resched() \|\|
	(i_mmap_lock && spin_needbreak(i_mmap_lock))) {		(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
	if (i_mmap_lock) {		if (i_mmap_lock)
	*tlbp = NULL;
	goto out;		goto out;
	}
	cond_resched();		cond_resched();
	}		}

	*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
	tlb_start_valid = 0;
	zap_work = ZAP_BLOCK_SIZE;		zap_work = ZAP_BLOCK_SIZE;
	}		}
	}		}
	@@ -1217,16 +1218,15 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
	unsigned long size, struct zap_details *details)		unsigned long size, struct zap_details *details)
	{		{
	struct mm_struct *mm = vma->vm_mm;		struct mm_struct *mm = vma->vm_mm;
	struct mmu_gather *tlb;		struct mmu_gather tlb;
	unsigned long end = address + size;		unsigned long end = address + size;
	unsigned long nr_accounted = 0;		unsigned long nr_accounted = 0;

	lru_add_drain();		lru_add_drain();
	tlb = tlb_gather_mmu(mm, 0);		tlb_gather_mmu(&tlb, mm, 0);
	update_hiwater_rss(mm);		update_hiwater_rss(mm);
	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);		end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
	if (tlb)		tlb_finish_mmu(&tlb, address, end);
	tlb_finish_mmu(tlb, address, end);
	return end;		return end;
	}		}

mm/mmap.c

+9 −9

Original line number	Original line	Diff line number	Diff line
	@@ -1903,17 +1903,17 @@ static void unmap_region(struct mm_struct *mm,
	unsigned long start, unsigned long end)		unsigned long start, unsigned long end)
	{		{
	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;		struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
	struct mmu_gather *tlb;		struct mmu_gather tlb;
	unsigned long nr_accounted = 0;		unsigned long nr_accounted = 0;

	lru_add_drain();		lru_add_drain();
	tlb = tlb_gather_mmu(mm, 0);		tlb_gather_mmu(&tlb, mm, 0);
	update_hiwater_rss(mm);		update_hiwater_rss(mm);
	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);		unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
	vm_unacct_memory(nr_accounted);		vm_unacct_memory(nr_accounted);
	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,		free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
	next ? next->vm_start : 0);		next ? next->vm_start : 0);
	tlb_finish_mmu(tlb, start, end);		tlb_finish_mmu(&tlb, start, end);
	}		}

	/*		/*
	@@ -2255,7 +2255,7 @@ EXPORT_SYMBOL(do_brk);
	/* Release all mmaps. */		/* Release all mmaps. */
	void exit_mmap(struct mm_struct *mm)		void exit_mmap(struct mm_struct *mm)
	{		{
	struct mmu_gather *tlb;		struct mmu_gather tlb;
	struct vm_area_struct *vma;		struct vm_area_struct *vma;
	unsigned long nr_accounted = 0;		unsigned long nr_accounted = 0;
	unsigned long end;		unsigned long end;
	@@ -2280,14 +2280,14 @@ void exit_mmap(struct mm_struct *mm)

	lru_add_drain();		lru_add_drain();
	flush_cache_mm(mm);		flush_cache_mm(mm);
	tlb = tlb_gather_mmu(mm, 1);		tlb_gather_mmu(&tlb, mm, 1);
	/* update_hiwater_rss(mm) here? but nobody should be looking */		/* update_hiwater_rss(mm) here? but nobody should be looking */
	/* Use -1 here to ensure all VMAs in the mm are unmapped */		/* Use -1 here to ensure all VMAs in the mm are unmapped */
	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);		end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
	vm_unacct_memory(nr_accounted);		vm_unacct_memory(nr_accounted);

	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);		free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
	tlb_finish_mmu(tlb, 0, end);		tlb_finish_mmu(&tlb, 0, end);

	/*		/*
	* Walk the list again, actually closing and freeing it,		* Walk the list again, actually closing and freeing it,