Merge branch 'sh/cachetlb' (5e3679c5) · Commits · e / devices / android_kernel_fairphone_FP4

arch/sh/include/asm/fixmap.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -14,9 +14,9 @@
		#define _ASM_FIXMAP_H

		#include <linux/kernel.h>
		#include <linux/threads.h>
		#include <asm/page.h>
		#ifdef CONFIG_HIGHMEM
		#include <linux/threads.h>
		#include <asm/kmap_types.h>
		#endif

		@@ -46,9 +46,9 @@
		* fix-mapped?
		*/
		enum fixed_addresses {
		#define FIX_N_COLOURS 16
		#define FIX_N_COLOURS 8
		FIX_CMAP_BEGIN,
		FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS,
		FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS),
		FIX_UNCACHED,
		#ifdef CONFIG_HIGHMEM
		FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */

arch/sh/mm/cache-sh4.c

+75 −421

Original line number	Diff line number	Diff line
		@@ -2,7 +2,7 @@
		* arch/sh/mm/cache-sh4.c
		*
		* Copyright (C) 1999, 2000, 2002 Niibe Yutaka
		* Copyright (C) 2001 - 2007 Paul Mundt
		* Copyright (C) 2001 - 2009 Paul Mundt
		* Copyright (C) 2003 Richard Curnow
		* Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
		*
		@@ -15,6 +15,8 @@
		#include <linux/io.h>
		#include <linux/mutex.h>
		#include <linux/fs.h>
		#include <linux/highmem.h>
		#include <asm/pgtable.h>
		#include <asm/mmu_context.h>
		#include <asm/cacheflush.h>

		@@ -23,20 +25,11 @@
		* flushing. Anything exceeding this will simply flush the dcache in its
		* entirety.
		*/
		#define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */
		#define MAX_ICACHE_PAGES 32

		static void __flush_cache_4096(unsigned long addr, unsigned long phys,
		unsigned long exec_offset);

		/*
		* This is initialised here to ensure that it is not placed in the BSS. If
		* that were to happen, note that cache_init gets called before the BSS is
		* cleared, so this would get nulled out which would be hopeless.
		*/
		static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
		(void (*)(unsigned long, unsigned long))0xdeadbeef;

		/*
		* Write back the range of D-cache, and purge the I-cache.
		*
		@@ -123,12 +116,12 @@ static void sh4_flush_dcache_page(void *arg)
		else
		#endif
		{
		unsigned long phys = PHYSADDR(page_address(page));
		unsigned long phys = page_to_phys(page);
		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
		int i, n;

		/* Loop all the D-cache */
		n = boot_cpu_data.dcache.n_aliases;
		n = boot_cpu_data.dcache.way_incr >> 12;
		for (i = 0; i < n; i++, addr += 4096)
		flush_cache_4096(addr, phys);
		}
		@@ -158,90 +151,33 @@ static void __uses_jump_to_uncached flush_icache_all(void)
		local_irq_restore(flags);
		}

		static inline void flush_dcache_all(void)
		static void flush_dcache_all(void)
		{
		(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
		wmb();
		}

		static void sh4_flush_cache_all(void *unused)
		{
		flush_dcache_all();
		flush_icache_all();
		}

		static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
		unsigned long end)
		{
		unsigned long d = 0, p = start & PAGE_MASK;
		unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
		unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
		unsigned long select_bit;
		unsigned long all_aliases_mask;
		unsigned long addr_offset;
		pgd_t *dir;
		pmd_t *pmd;
		pud_t *pud;
		pte_t *pte;
		int i;
		unsigned long addr, end_addr, entry_offset;

		dir = pgd_offset(mm, p);
		pud = pud_offset(dir, p);
		pmd = pmd_offset(pud, p);
		end = PAGE_ALIGN(end);
		end_addr = CACHE_OC_ADDRESS_ARRAY +
		(current_cpu_data.dcache.sets <<
		current_cpu_data.dcache.entry_shift) *
		current_cpu_data.dcache.ways;

		all_aliases_mask = (1 << n_aliases) - 1;
		entry_offset = 1 << current_cpu_data.dcache.entry_shift;

		do {
		if (pmd_none(pmd) \|\| unlikely(pmd_bad(pmd))) {
		p &= PMD_MASK;
		p += PMD_SIZE;
		pmd++;

		continue;
		}

		pte = pte_offset_kernel(pmd, p);

		do {
		unsigned long phys;
		pte_t entry = *pte;

		if (!(pte_val(entry) & _PAGE_PRESENT)) {
		pte++;
		p += PAGE_SIZE;
		continue;
		}

		phys = pte_val(entry) & PTE_PHYS_MASK;

		if ((p ^ phys) & alias_mask) {
		d \|= 1 << ((p & alias_mask) >> PAGE_SHIFT);
		d \|= 1 << ((phys & alias_mask) >> PAGE_SHIFT);

		if (d == all_aliases_mask)
		goto loop_exit;
		for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		__raw_writel(0, addr); addr += entry_offset;
		}

		pte++;
		p += PAGE_SIZE;
		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
		pmd++;
		} while (p < end);

		loop_exit:
		addr_offset = 0;
		select_bit = 1;

		for (i = 0; i < n_aliases; i++) {
		if (d & select_bit) {
		(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
		wmb();
		}

		select_bit <<= 1;
		addr_offset += PAGE_SIZE;
		}
		static void sh4_flush_cache_all(void *unused)
		{
		flush_dcache_all();
		flush_icache_all();
		}

		/*
		@@ -250,9 +186,7 @@ static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
		* D-cache. The assumption elsewhere, e.g. flush_cache_range, is that
		* lines can stay resident so long as the virtual address they were
		* accessed with (hence cache set) is in accord with the physical
		* address (i.e. tag). It's no different here. So I reckon we don't
		* need to flush the I-cache, since aliases don't matter for that. We
		* should try that.
		* address (i.e. tag). It's no different here.
		*
		* Caller takes mm->mmap_sem.
		*/
		@@ -263,33 +197,7 @@ static void sh4_flush_cache_mm(void *arg)
		if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
		return;

		/*
		* If cache is only 4k-per-way, there are never any 'aliases'. Since
		* the cache is physically tagged, the data can just be left in there.
		*/
		if (boot_cpu_data.dcache.n_aliases == 0)
		return;

		/*
		* Don't bother groveling around the dcache for the VMA ranges
		* if there are too many PTEs to make it worthwhile.
		*/
		if (mm->nr_ptes >= MAX_DCACHE_PAGES)
		flush_dcache_all();
		else {
		struct vm_area_struct *vma;

		/*
		* In this case there are reasonably sized ranges to flush,
		* iterate through the VMA list and take care of any aliases.
		*/
		for (vma = mm->mmap; vma; vma = vma->vm_next)
		__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
		}

		/* Only touch the icache if one of the VMAs has VM_EXEC set. */
		if (mm->exec_vm)
		flush_icache_all();
		}

		/*
		@@ -302,44 +210,64 @@ static void sh4_flush_cache_page(void *args)
		{
		struct flusher_data *data = args;
		struct vm_area_struct *vma;
		struct page *page;
		unsigned long address, pfn, phys;
		unsigned int alias_mask;
		int map_coherent = 0;
		pgd_t *pgd;
		pud_t *pud;
		pmd_t *pmd;
		pte_t *pte;
		void *vaddr;

		vma = data->vma;
		address = data->addr1;
		pfn = data->addr2;
		phys = pfn << PAGE_SHIFT;
		page = pfn_to_page(pfn);

		if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
		return;

		alias_mask = boot_cpu_data.dcache.alias_mask;

		/* We only need to flush D-cache when we have alias */
		if ((address^phys) & alias_mask) {
		/* Loop 4K of the D-cache */
		flush_cache_4096(
		CACHE_OC_ADDRESS_ARRAY \| (address & alias_mask),
		phys);
		/* Loop another 4K of the D-cache */
		flush_cache_4096(
		CACHE_OC_ADDRESS_ARRAY \| (phys & alias_mask),
		phys);
		}
		address &= PAGE_MASK;
		pgd = pgd_offset(vma->vm_mm, address);
		pud = pud_offset(pgd, address);
		pmd = pmd_offset(pud, address);
		pte = pte_offset_kernel(pmd, address);

		/* If the page isn't present, there is nothing to do here. */
		if (!(pte_val(*pte) & _PAGE_PRESENT))
		return;

		alias_mask = boot_cpu_data.icache.alias_mask;
		if (vma->vm_flags & VM_EXEC) {
		if ((vma->vm_mm == current->active_mm))
		vaddr = NULL;
		else {
		/*
		* Evict entries from the portion of the cache from which code
		* may have been executed at this address (virtual). There's
		* no need to evict from the portion corresponding to the
		* physical address as for the D-cache, because we know the
		* kernel has never executed the code through its identity
		* translation.
		* Use kmap_coherent or kmap_atomic to do flushes for
		* another ASID than the current one.
		*/
		flush_cache_4096(
		CACHE_IC_ADDRESS_ARRAY \| (address & alias_mask),
		phys);
		map_coherent = (current_cpu_data.dcache.n_aliases &&
		!test_bit(PG_dcache_dirty, &page->flags) &&
		page_mapped(page));
		if (map_coherent)
		vaddr = kmap_coherent(page, address);
		else
		vaddr = kmap_atomic(page, KM_USER0);

		address = (unsigned long)vaddr;
		}

		if (pages_do_alias(address, phys))
		flush_cache_4096(CACHE_OC_ADDRESS_ARRAY \|
		(address & shm_align_mask), phys);

		if (vma->vm_flags & VM_EXEC)
		flush_icache_all();

		if (vaddr) {
		if (map_coherent)
		kunmap_coherent(vaddr);
		else
		kunmap_atomic(vaddr, KM_USER0);
		}
		}

		@@ -372,25 +300,11 @@ static void sh4_flush_cache_range(void *args)
		if (boot_cpu_data.dcache.n_aliases == 0)
		return;

		/*
		* Don't bother with the lookup and alias check if we have a
		* wide range to cover, just blow away the dcache in its
		* entirety instead. -- PFM.
		*/
		if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
		flush_dcache_all();
		else
		__flush_cache_mm(vma->vm_mm, start, end);

		if (vma->vm_flags & VM_EXEC) {
		/*
		* TODO: Is this required??? Need to look at how I-cache
		* coherency is assured when new programs are loaded to see if
		* this matters.
		*/
		if (vma->vm_flags & VM_EXEC)
		flush_icache_all();
		}
		}

		/**
		* __flush_cache_4096
		@@ -443,7 +357,7 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys,
		* pointless nead-of-loop check for 0 iterations.
		*/
		do {
		ea = base_addr + PAGE_SIZE;
		ea = base_addr + 4096;
		a = base_addr;
		p = phys;

		@@ -463,245 +377,6 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys,
		} while (--way_count != 0);
		}

		/*
		* Break the 1, 2 and 4 way variants of this out into separate functions to
		* avoid nearly all the overhead of having the conditional stuff in the function
		* bodies (+ the 1 and 2 way cases avoid saving any registers too).
		*
		* We want to eliminate unnecessary bus transactions, so this code uses
		* a non-obvious technique.
		*
		* Loop over a cache way sized block of, one cache line at a time. For each
		* line, use movca.a to cause the current cache line contents to be written
		* back, but without reading anything from main memory. However this has the
		* side effect that the cache is now caching that memory location. So follow
		* this with a cache invalidate to mark the cache line invalid. And do all
		* this with interrupts disabled, to avoid the cache line being accidently
		* evicted while it is holding garbage.
		*
		* This also breaks in a number of circumstances:
		* - if there are modifications to the region of memory just above
		* empty_zero_page (for example because a breakpoint has been placed
		* there), then these can be lost.
		*
		* This is because the the memory address which the cache temporarily
		* caches in the above description is empty_zero_page. So the
		* movca.l hits the cache (it is assumed that it misses, or at least
		* isn't dirty), modifies the line and then invalidates it, losing the
		* required change.
		*
		* - If caches are disabled or configured in write-through mode, then
		* the movca.l writes garbage directly into memory.
		*/
		static void __flush_dcache_segment_writethrough(unsigned long start,
		unsigned long extent_per_way)
		{
		unsigned long addr;
		int i;

		addr = CACHE_OC_ADDRESS_ARRAY \| (start & cpu_data->dcache.entry_mask);

		while (extent_per_way) {
		for (i = 0; i < cpu_data->dcache.ways; i++)
		__raw_writel(0, addr + cpu_data->dcache.way_incr * i);

		addr += cpu_data->dcache.linesz;
		extent_per_way -= cpu_data->dcache.linesz;
		}
		}

		static void __flush_dcache_segment_1way(unsigned long start,
		unsigned long extent_per_way)
		{
		unsigned long orig_sr, sr_with_bl;
		unsigned long base_addr;
		unsigned long way_incr, linesz, way_size;
		struct cache_info *dcache;
		register unsigned long a0, a0e;

		asm volatile("stc sr, %0" : "=r" (orig_sr));
		sr_with_bl = orig_sr \| (1<<28);
		base_addr = ((unsigned long)&empty_zero_page[0]);

		/*
		* The previous code aligned base_addr to 16k, i.e. the way_size of all
		* existing SH-4 D-caches. Whilst I don't see a need to have this
		* aligned to any better than the cache line size (which it will be
		* anyway by construction), let's align it to at least the way_size of
		* any existing or conceivable SH-4 D-cache. -- RPC
		*/
		base_addr = ((base_addr >> 16) << 16);
		base_addr \|= start;

		dcache = &boot_cpu_data.dcache;
		linesz = dcache->linesz;
		way_incr = dcache->way_incr;
		way_size = dcache->way_size;

		a0 = base_addr;
		a0e = base_addr + extent_per_way;
		do {
		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
		asm volatile("movca.l r0, @%0\n\t"
		"ocbi @%0" : : "r" (a0));
		a0 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"ocbi @%0" : : "r" (a0));
		a0 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"ocbi @%0" : : "r" (a0));
		a0 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"ocbi @%0" : : "r" (a0));
		asm volatile("ldc %0, sr" : : "r" (orig_sr));
		a0 += linesz;
		} while (a0 < a0e);
		}

		static void __flush_dcache_segment_2way(unsigned long start,
		unsigned long extent_per_way)
		{
		unsigned long orig_sr, sr_with_bl;
		unsigned long base_addr;
		unsigned long way_incr, linesz, way_size;
		struct cache_info *dcache;
		register unsigned long a0, a1, a0e;

		asm volatile("stc sr, %0" : "=r" (orig_sr));
		sr_with_bl = orig_sr \| (1<<28);
		base_addr = ((unsigned long)&empty_zero_page[0]);

		/* See comment under 1-way above */
		base_addr = ((base_addr >> 16) << 16);
		base_addr \|= start;

		dcache = &boot_cpu_data.dcache;
		linesz = dcache->linesz;
		way_incr = dcache->way_incr;
		way_size = dcache->way_size;

		a0 = base_addr;
		a1 = a0 + way_incr;
		a0e = base_addr + extent_per_way;
		do {
		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1" : :
		"r" (a0), "r" (a1));
		a0 += linesz;
		a1 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1" : :
		"r" (a0), "r" (a1));
		a0 += linesz;
		a1 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1" : :
		"r" (a0), "r" (a1));
		a0 += linesz;
		a1 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1" : :
		"r" (a0), "r" (a1));
		asm volatile("ldc %0, sr" : : "r" (orig_sr));
		a0 += linesz;
		a1 += linesz;
		} while (a0 < a0e);
		}

		static void __flush_dcache_segment_4way(unsigned long start,
		unsigned long extent_per_way)
		{
		unsigned long orig_sr, sr_with_bl;
		unsigned long base_addr;
		unsigned long way_incr, linesz, way_size;
		struct cache_info *dcache;
		register unsigned long a0, a1, a2, a3, a0e;

		asm volatile("stc sr, %0" : "=r" (orig_sr));
		sr_with_bl = orig_sr \| (1<<28);
		base_addr = ((unsigned long)&empty_zero_page[0]);

		/* See comment under 1-way above */
		base_addr = ((base_addr >> 16) << 16);
		base_addr \|= start;

		dcache = &boot_cpu_data.dcache;
		linesz = dcache->linesz;
		way_incr = dcache->way_incr;
		way_size = dcache->way_size;

		a0 = base_addr;
		a1 = a0 + way_incr;
		a2 = a1 + way_incr;
		a3 = a2 + way_incr;
		a0e = base_addr + extent_per_way;
		do {
		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"movca.l r0, @%2\n\t"
		"movca.l r0, @%3\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1\n\t"
		"ocbi @%2\n\t"
		"ocbi @%3\n\t" : :
		"r" (a0), "r" (a1), "r" (a2), "r" (a3));
		a0 += linesz;
		a1 += linesz;
		a2 += linesz;
		a3 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"movca.l r0, @%2\n\t"
		"movca.l r0, @%3\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1\n\t"
		"ocbi @%2\n\t"
		"ocbi @%3\n\t" : :
		"r" (a0), "r" (a1), "r" (a2), "r" (a3));
		a0 += linesz;
		a1 += linesz;
		a2 += linesz;
		a3 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"movca.l r0, @%2\n\t"
		"movca.l r0, @%3\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1\n\t"
		"ocbi @%2\n\t"
		"ocbi @%3\n\t" : :
		"r" (a0), "r" (a1), "r" (a2), "r" (a3));
		a0 += linesz;
		a1 += linesz;
		a2 += linesz;
		a3 += linesz;
		asm volatile("movca.l r0, @%0\n\t"
		"movca.l r0, @%1\n\t"
		"movca.l r0, @%2\n\t"
		"movca.l r0, @%3\n\t"
		"ocbi @%0\n\t"
		"ocbi @%1\n\t"
		"ocbi @%2\n\t"
		"ocbi @%3\n\t" : :
		"r" (a0), "r" (a1), "r" (a2), "r" (a3));
		asm volatile("ldc %0, sr" : : "r" (orig_sr));
		a0 += linesz;
		a1 += linesz;
		a2 += linesz;
		a3 += linesz;
		} while (a0 < a0e);
		}

		extern void __weak sh4__flush_region_init(void);

		/*
		@@ -709,32 +384,11 @@ extern void __weak sh4__flush_region_init(void);
		*/
		void __init sh4_cache_init(void)
		{
		unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);

		printk("PVR=%08x CVR=%08x PRR=%08x\n",
		ctrl_inl(CCN_PVR),
		ctrl_inl(CCN_CVR),
		ctrl_inl(CCN_PRR));

		if (wt_enabled)
		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
		else {
		switch (boot_cpu_data.dcache.ways) {
		case 1:
		__flush_dcache_segment_fn = __flush_dcache_segment_1way;
		break;
		case 2:
		__flush_dcache_segment_fn = __flush_dcache_segment_2way;
		break;
		case 4:
		__flush_dcache_segment_fn = __flush_dcache_segment_4way;
		break;
		default:
		panic("unknown number of cache ways\n");
		break;
		}
		}

		local_flush_icache_range = sh4_flush_icache_range;
		local_flush_dcache_page = sh4_flush_dcache_page;
		local_flush_cache_all = sh4_flush_cache_all;

arch/sh/mm/cache.c

+6 −0

Original line number	Diff line number	Diff line
		@@ -164,11 +164,17 @@ void flush_cache_all(void)

		void flush_cache_mm(struct mm_struct *mm)
		{
		if (boot_cpu_data.dcache.n_aliases == 0)
		return;

		cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
		}

		void flush_cache_dup_mm(struct mm_struct *mm)
		{
		if (boot_cpu_data.dcache.n_aliases == 0)
		return;

		cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
		}

arch/sh/mm/kmap.c

+3 −1

Original line number	Diff line number	Diff line
		@@ -39,7 +39,9 @@ void kmap_coherent(struct page page, unsigned long addr)
		pagefault_disable();

		idx = FIX_CMAP_END -
		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
		(((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) +
		(FIX_N_COLOURS * smp_processor_id()));

		vaddr = __fix_to_virt(idx);

		BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));