Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (203b4fc9) · Commits · e / devices / android_kernel_fairphone_FP4

arch/arm/include/asm/tlb.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -292,5 +292,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
		{
		}

		static inline void tlb_flush_remove_tables(struct mm_struct *mm)
		{
		}

		static inline void tlb_flush_remove_tables_local(void *arg)
		{
		}

		#endif /* CONFIG_MMU */
		#endif

arch/arm64/mm/mmu.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
		return 1;
		}

		int pud_free_pmd_page(pud_t *pud)
		int pud_free_pmd_page(pud_t *pud, unsigned long addr)
		{
		return pud_none(*pud);
		}

		int pmd_free_pte_page(pmd_t *pmd)
		int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
		{
		return pmd_none(*pmd);
		}

arch/x86/include/asm/tlbflush.h

+5 −16

Original line number	Diff line number	Diff line
		@@ -148,22 +148,6 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
		#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
		#endif

		static inline bool tlb_defer_switch_to_init_mm(void)
		{
		/*
		* If we have PCID, then switching to init_mm is reasonably
		* fast. If we don't have PCID, then switching to init_mm is
		* quite slow, so we try to defer it in the hopes that we can
		* avoid it entirely. The latter approach runs the risk of
		* receiving otherwise unnecessary IPIs.
		*
		* This choice is just a heuristic. The tlb code can handle this
		* function returning true or false regardless of whether we have
		* PCID.
		*/
		return !static_cpu_has(X86_FEATURE_PCID);
		}

		struct tlb_context {
		u64 ctx_id;
		u64 tlb_gen;
		@@ -554,4 +538,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
		native_flush_tlb_others(mask, info)
		#endif

		extern void tlb_flush_remove_tables(struct mm_struct *mm);
		extern void tlb_flush_remove_tables_local(void *arg);

		#define HAVE_TLB_FLUSH_REMOVE_TABLES

		#endif /* _ASM_X86_TLBFLUSH_H */

arch/x86/mm/pgtable.c

+53 −11

Original line number	Diff line number	Diff line
		@@ -329,9 +329,6 @@ static int __init pgd_cache_init(void)
		*/
		pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
		SLAB_PANIC, NULL);
		if (!pgd_cache)
		return -ENOMEM;

		return 0;
		}
		core_initcall(pgd_cache_init);
		@@ -719,28 +716,50 @@ int pmd_clear_huge(pmd_t *pmd)
		return 0;
		}

		#ifdef CONFIG_X86_64
		/**
		* pud_free_pmd_page - Clear pud entry and free pmd page.
		* @pud: Pointer to a PUD.
		* @addr: Virtual address associated with pud.
		*
		* Context: The pud range has been unmaped and TLB purged.
		* Context: The pud range has been unmapped and TLB purged.
		* Return: 1 if clearing the entry succeeded. 0 otherwise.
		*
		* NOTE: Callers must allow a single page allocation.
		*/
		int pud_free_pmd_page(pud_t *pud)
		int pud_free_pmd_page(pud_t *pud, unsigned long addr)
		{
		pmd_t *pmd;
		pmd_t pmd, pmd_sv;
		pte_t *pte;
		int i;

		if (pud_none(*pud))
		return 1;

		pmd = (pmd_t )pud_page_vaddr(pud);

		for (i = 0; i < PTRS_PER_PMD; i++)
		if (!pmd_free_pte_page(&pmd[i]))
		pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
		if (!pmd_sv)
		return 0;

		for (i = 0; i < PTRS_PER_PMD; i++) {
		pmd_sv[i] = pmd[i];
		if (!pmd_none(pmd[i]))
		pmd_clear(&pmd[i]);
		}

		pud_clear(pud);

		/* INVLPG to clear all paging-structure caches */
		flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);

		for (i = 0; i < PTRS_PER_PMD; i++) {
		if (!pmd_none(pmd_sv[i])) {
		pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
		free_page((unsigned long)pte);
		}
		}

		free_page((unsigned long)pmd_sv);
		free_page((unsigned long)pmd);

		return 1;
		@@ -749,11 +768,12 @@ int pud_free_pmd_page(pud_t *pud)
		/**
		* pmd_free_pte_page - Clear pmd entry and free pte page.
		* @pmd: Pointer to a PMD.
		* @addr: Virtual address associated with pmd.
		*
		* Context: The pmd range has been unmaped and TLB purged.
		* Context: The pmd range has been unmapped and TLB purged.
		* Return: 1 if clearing the entry succeeded. 0 otherwise.
		*/
		int pmd_free_pte_page(pmd_t *pmd)
		int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
		{
		pte_t *pte;

		@@ -762,8 +782,30 @@ int pmd_free_pte_page(pmd_t *pmd)

		pte = (pte_t )pmd_page_vaddr(pmd);
		pmd_clear(pmd);

		/* INVLPG to clear all paging-structure caches */
		flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);

		free_page((unsigned long)pte);

		return 1;
		}

		#else /* !CONFIG_X86_64 */

		int pud_free_pmd_page(pud_t *pud, unsigned long addr)
		{
		return pud_none(*pud);
		}

		/*
		* Disable free page handling on x86-PAE. This assures that ioremap()
		* does not update sync'd pmd entries. See vmalloc_sync_one().
		*/
		int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
		{
		return pmd_none(*pmd);
		}

		#endif /* CONFIG_X86_64 */
		#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */

arch/x86/mm/tlb.c

+165 −59

Original line number	Diff line number	Diff line
		@@ -7,6 +7,7 @@
		#include <linux/export.h>
		#include <linux/cpu.h>
		#include <linux/debugfs.h>
		#include <linux/gfp.h>

		#include <asm/tlbflush.h>
		#include <asm/mmu_context.h>
		@@ -35,7 +36,7 @@
		* necessary invalidation by clearing out the 'ctx_id' which
		* forces a TLB flush when the context is loaded.
		*/
		void clear_asid_other(void)
		static void clear_asid_other(void)
		{
		u16 asid;

		@@ -185,8 +186,11 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
		{
		struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
		u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
		bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
		unsigned cpu = smp_processor_id();
		u64 next_tlb_gen;
		bool need_flush;
		u16 new_asid;

		/*
		* NB: The scheduler will call us with prev == next when switching
		@@ -240,20 +244,41 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
		next->context.ctx_id);

		/*
		* We don't currently support having a real mm loaded without
		* our cpu set in mm_cpumask(). We have all the bookkeeping
		* in place to figure out whether we would need to flush
		* if our cpu were cleared in mm_cpumask(), but we don't
		* currently use it.
		* Even in lazy TLB mode, the CPU should stay set in the
		* mm_cpumask. The TLB shootdown code can figure out from
		* from cpu_tlbstate.is_lazy whether or not to send an IPI.
		*/
		if (WARN_ON_ONCE(real_prev != &init_mm &&
		!cpumask_test_cpu(cpu, mm_cpumask(next))))
		cpumask_set_cpu(cpu, mm_cpumask(next));

		/*
		* If the CPU is not in lazy TLB mode, we are just switching
		* from one thread in a process to another thread in the same
		* process. No TLB flush required.
		*/
		if (!was_lazy)
		return;

		/*
		* Read the tlb_gen to check whether a flush is needed.
		* If the TLB is up to date, just use it.
		* The barrier synchronizes with the tlb_gen increment in
		* the TLB shootdown code.
		*/
		smp_mb();
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
		next_tlb_gen)
		return;

		/*
		* TLB contents went out of date while we were in lazy
		* mode. Fall through to the TLB switching code below.
		*/
		new_asid = prev_asid;
		need_flush = true;
		} else {
		u16 new_asid;
		bool need_flush;
		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);

		/*
		@@ -285,18 +310,26 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
		sync_current_stack_to_mm(next);
		}

		/* Stop remote flushes for the previous mm */
		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
		real_prev != &init_mm);
		/*
		* Stop remote flushes for the previous mm.
		* Skip kernel threads; we never send init_mm TLB flushing IPIs,
		* but the bitmap manipulation can cause cache line contention.
		*/
		if (real_prev != &init_mm) {
		VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
		mm_cpumask(real_prev)));
		cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
		}

		/*
		* Start remote flushes and then read tlb_gen.
		*/
		if (next != &init_mm)
		cpumask_set_cpu(cpu, mm_cpumask(next));
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);

		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
		}

		if (need_flush) {
		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
		@@ -330,7 +363,6 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,

		this_cpu_write(cpu_tlbstate.loaded_mm, next);
		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
		}

		load_mm_cr4(next);
		switch_ldt(real_prev, next);
		@@ -354,20 +386,7 @@ void enter_lazy_tlb(struct mm_struct mm, struct task_struct tsk)
		if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
		return;

		if (tlb_defer_switch_to_init_mm()) {
		/*
		* There's a significant optimization that may be possible
		* here. We have accurate enough TLB flush tracking that we
		* don't need to maintain coherence of TLB per se when we're
		* lazy. We do, however, need to maintain coherence of
		* paging-structure caches. We could, in principle, leave our
		* old mm loaded and only switch to init_mm when
		* tlb_remove_page() happens.
		*/
		this_cpu_write(cpu_tlbstate.is_lazy, true);
		} else {
		switch_mm(NULL, &init_mm, NULL);
		}
		}

		/*
		@@ -454,6 +473,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
		* paging-structure cache to avoid speculatively reading
		* garbage into our TLB. Since switching to init_mm is barely
		* slower than a minimal flush, just switch to init_mm.
		*
		* This should be rare, with native_flush_tlb_others skipping
		* IPIs to lazy TLB mode CPUs.
		*/
		switch_mm_irqs_off(NULL, &init_mm, NULL);
		return;
		@@ -560,6 +582,9 @@ static void flush_tlb_func_remote(void *info)
		void native_flush_tlb_others(const struct cpumask *cpumask,
		const struct flush_tlb_info *info)
		{
		cpumask_var_t lazymask;
		unsigned int cpu;

		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
		if (info->end == TLB_FLUSH_ALL)
		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
		@@ -583,8 +608,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
		* that UV should be updated so that smp_call_function_many(),
		* etc, are optimal on UV.
		*/
		unsigned int cpu;

		cpu = smp_processor_id();
		cpumask = uv_flush_tlb_others(cpumask, info);
		if (cpumask)
		@@ -592,8 +615,29 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
		(void *)info, 1);
		return;
		}

		/*
		* A temporary cpumask is used in order to skip sending IPIs
		* to CPUs in lazy TLB state, while keeping them in mm_cpumask(mm).
		* If the allocation fails, simply IPI every CPU in mm_cpumask.
		*/
		if (!alloc_cpumask_var(&lazymask, GFP_ATOMIC)) {
		smp_call_function_many(cpumask, flush_tlb_func_remote,
		(void *)info, 1);
		return;
		}

		cpumask_copy(lazymask, cpumask);

		for_each_cpu(cpu, lazymask) {
		if (per_cpu(cpu_tlbstate.is_lazy, cpu))
		cpumask_clear_cpu(cpu, lazymask);
		}

		smp_call_function_many(lazymask, flush_tlb_func_remote,
		(void *)info, 1);

		free_cpumask_var(lazymask);
		}

		/*
		@@ -646,6 +690,68 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
		put_cpu();
		}

		void tlb_flush_remove_tables_local(void *arg)
		{
		struct mm_struct *mm = arg;

		if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm &&
		this_cpu_read(cpu_tlbstate.is_lazy)) {
		/*
		* We're in lazy mode. We need to at least flush our
		* paging-structure cache to avoid speculatively reading
		* garbage into our TLB. Since switching to init_mm is barely
		* slower than a minimal flush, just switch to init_mm.
		*/
		switch_mm_irqs_off(NULL, &init_mm, NULL);
		}
		}

		static void mm_fill_lazy_tlb_cpu_mask(struct mm_struct *mm,
		struct cpumask *lazy_cpus)
		{
		int cpu;

		for_each_cpu(cpu, mm_cpumask(mm)) {
		if (!per_cpu(cpu_tlbstate.is_lazy, cpu))
		cpumask_set_cpu(cpu, lazy_cpus);
		}
		}

		void tlb_flush_remove_tables(struct mm_struct *mm)
		{
		int cpu = get_cpu();
		cpumask_var_t lazy_cpus;

		if (cpumask_any_but(mm_cpumask(mm), cpu) >= nr_cpu_ids) {
		put_cpu();
		return;
		}

		if (!zalloc_cpumask_var(&lazy_cpus, GFP_ATOMIC)) {
		/*
		* If the cpumask allocation fails, do a brute force flush
		* on all the CPUs that have this mm loaded.
		*/
		smp_call_function_many(mm_cpumask(mm),
		tlb_flush_remove_tables_local, (void *)mm, 1);
		put_cpu();
		return;
		}

		/*
		* CPUs with !is_lazy either received a TLB flush IPI while the user
		* pages in this address range were unmapped, or have context switched
		* and reloaded %CR3 since then.
		*
		* Shootdown IPIs at page table freeing time only need to be sent to
		* CPUs that may have out of date TLB contents.
		*/
		mm_fill_lazy_tlb_cpu_mask(mm, lazy_cpus);
		smp_call_function_many(lazy_cpus,
		tlb_flush_remove_tables_local, (void *)mm, 1);
		free_cpumask_var(lazy_cpus);
		put_cpu();
		}

		static void do_flush_tlb_all(void *info)
		{