[PATCH] ARM: Fix Xscale copy_page implementation (f8f98a93) · Commits · e / devices / android_kernel_fairphone_FP5

arch/arm/mm/Kconfig

+0 −7

Original line number	Diff line number	Diff line
		@@ -228,7 +228,6 @@ config CPU_SA1100
		select CPU_CACHE_V4WB
		select CPU_CACHE_VIVT
		select CPU_TLB_V4WB
		select CPU_MINICACHE

		# XScale
		config CPU_XSCALE
		@@ -239,7 +238,6 @@ config CPU_XSCALE
		select CPU_ABRT_EV5T
		select CPU_CACHE_VIVT
		select CPU_TLB_V4WBI
		select CPU_MINICACHE

		# ARMv6
		config CPU_V6
		@@ -345,11 +343,6 @@ config CPU_TLB_V4WBI
		config CPU_TLB_V6
		bool

		config CPU_MINICACHE
		bool
		help
		Processor has a minicache.

		comment "Processor Features"

		config ARM_THUMB

arch/arm/mm/Makefile

+0 −2

Original line number	Diff line number	Diff line
		@@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o
		obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
		obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o

		obj-$(CONFIG_CPU_MINICACHE) += minicache.o

		obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
		obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
		obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o

arch/arm/mm/copypage-xscale.S

deleted100644 → 0

+0 −113

Original line number	Diff line number	Diff line
		/*
		* linux/arch/arm/lib/copypage-xscale.S
		*
		* Copyright (C) 2001 Russell King
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*/
		#include <linux/linkage.h>
		#include <linux/init.h>
		#include <asm/constants.h>

		/*
		* General note:
		* We don't really want write-allocate cache behaviour for these functions
		* since that will just eat through 8K of the cache.
		*/

		.text
		.align 5
		/*
		* XScale optimised copy_user_page
		* r0 = destination
		* r1 = source
		* r2 = virtual user address of ultimate destination page
		*
		* The source page may have some clean entries in the cache already, but we
		* can safely ignore them - break_cow() will flush them out of the cache
		* if we eventually end up using our copied page.
		*
		* What we could do is use the mini-cache to buffer reads from the source
		* page. We rely on the mini-cache being smaller than one page, so we'll
		* cycle through the complete cache anyway.
		*/
		ENTRY(xscale_mc_copy_user_page)
		stmfd sp!, {r4, r5, lr}
		mov r5, r0
		mov r0, r1
		bl map_page_minicache
		mov r1, r5
		mov lr, #PAGE_SZ/64-1

		/*
		* Strangely enough, best performance is achieved
		* when prefetching destination as well. (NP)
		*/
		pld [r0, #0]
		pld [r0, #32]
		pld [r1, #0]
		pld [r1, #32]

		1: pld [r0, #64]
		pld [r0, #96]
		pld [r1, #64]
		pld [r1, #96]

		2: ldrd r2, [r0], #8
		ldrd r4, [r0], #8
		mov ip, r1
		strd r2, [r1], #8
		ldrd r2, [r0], #8
		strd r4, [r1], #8
		ldrd r4, [r0], #8
		strd r2, [r1], #8
		strd r4, [r1], #8
		mcr p15, 0, ip, c7, c10, 1 @ clean D line
		ldrd r2, [r0], #8
		mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
		ldrd r4, [r0], #8
		mov ip, r1
		strd r2, [r1], #8
		ldrd r2, [r0], #8
		strd r4, [r1], #8
		ldrd r4, [r0], #8
		strd r2, [r1], #8
		strd r4, [r1], #8
		mcr p15, 0, ip, c7, c10, 1 @ clean D line
		subs lr, lr, #1
		mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
		bgt 1b
		beq 2b

		ldmfd sp!, {r4, r5, pc}

		.align 5
		/*
		* XScale optimised clear_user_page
		* r0 = destination
		* r1 = virtual user address of ultimate destination page
		*/
		ENTRY(xscale_mc_clear_user_page)
		mov r1, #PAGE_SZ/32
		mov r2, #0
		mov r3, #0
		1: mov ip, r0
		strd r2, [r0], #8
		strd r2, [r0], #8
		strd r2, [r0], #8
		strd r2, [r0], #8
		mcr p15, 0, ip, c7, c10, 1 @ clean D line
		subs r1, r1, #1
		mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
		bne 1b
		mov pc, lr

		__INITDATA

		.type xscale_mc_user_fns, #object
		ENTRY(xscale_mc_user_fns)
		.long xscale_mc_clear_user_page
		.long xscale_mc_copy_user_page
		.size xscale_mc_user_fns, . - xscale_mc_user_fns

arch/arm/mm/copypage-xscale.c

0 → 100644

+131 −0

Original line number	Diff line number	Diff line
		/*
		* linux/arch/arm/lib/copypage-xscale.S
		*
		* Copyright (C) 1995-2005 Russell King
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*
		* This handles the mini data cache, as found on SA11x0 and XScale
		* processors. When we copy a user page page, we map it in such a way
		* that accesses to this page will not touch the main data cache, but
		* will be cached in the mini data cache. This prevents us thrashing
		* the main data cache on page faults.
		*/
		#include <linux/init.h>
		#include <linux/mm.h>

		#include <asm/page.h>
		#include <asm/pgtable.h>
		#include <asm/tlbflush.h>

		/*
		* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
		* specific hacks for copying pages efficiently.
		*/
		#define COPYPAGE_MINICACHE 0xffff8000

		#define minicache_pgprot __pgprot(L_PTE_PRESENT \| L_PTE_YOUNG \| \
		L_PTE_CACHEABLE)

		#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)

		static DEFINE_SPINLOCK(minicache_lock);

		/*
		* XScale mini-dcache optimised copy_user_page
		*
		* We flush the destination cache lines just before we write the data into the
		* corresponding address. Since the Dcache is read-allocate, this removes the
		* Dcache aliasing issue. The writes will be forwarded to the write buffer,
		* and merged as appropriate.
		*/
		static void __attribute__((naked))
		mc_copy_user_page(void from, void to)
		{
		/*
		* Strangely enough, best performance is achieved
		* when prefetching destination as well. (NP)
		*/
		asm volatile(
		"stmfd sp!, {r4, r5, lr} \n\
		mov lr, %2 \n\
		pld [r0, #0] \n\
		pld [r0, #32] \n\
		pld [r1, #0] \n\
		pld [r1, #32] \n\
		1: pld [r0, #64] \n\
		pld [r0, #96] \n\
		pld [r1, #64] \n\
		pld [r1, #96] \n\
		2: ldrd r2, [r0], #8 \n\
		ldrd r4, [r0], #8 \n\
		mov ip, r1 \n\
		strd r2, [r1], #8 \n\
		ldrd r2, [r0], #8 \n\
		strd r4, [r1], #8 \n\
		ldrd r4, [r0], #8 \n\
		strd r2, [r1], #8 \n\
		strd r4, [r1], #8 \n\
		mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
		ldrd r2, [r0], #8 \n\
		mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
		ldrd r4, [r0], #8 \n\
		mov ip, r1 \n\
		strd r2, [r1], #8 \n\
		ldrd r2, [r0], #8 \n\
		strd r4, [r1], #8 \n\
		ldrd r4, [r0], #8 \n\
		strd r2, [r1], #8 \n\
		strd r4, [r1], #8 \n\
		mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
		subs lr, lr, #1 \n\
		mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
		bgt 1b \n\
		beq 2b \n\
		ldmfd sp!, {r4, r5, pc} "
		:
		: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1));
		}

		void xscale_mc_copy_user_page(void kto, const void kfrom, unsigned long vaddr)
		{
		spin_lock(&minicache_lock);

		set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
		flush_tlb_kernel_page(COPYPAGE_MINICACHE);

		mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);

		spin_unlock(&minicache_lock);
		}

		/*
		* XScale optimised clear_user_page
		*/
		void __attribute__((naked))
		xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
		{
		asm volatile(
		"mov r1, %0 \n\
		mov r2, #0 \n\
		mov r3, #0 \n\
		1: mov ip, r0 \n\
		strd r2, [r0], #8 \n\
		strd r2, [r0], #8 \n\
		strd r2, [r0], #8 \n\
		strd r2, [r0], #8 \n\
		mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
		subs r1, r1, #1 \n\
		mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
		bne 1b \n\
		mov pc, lr"
		:
		: "I" (PAGE_SIZE / 32));
		}

		struct cpu_user_fns xscale_mc_user_fns __initdata = {
		.cpu_clear_user_page = xscale_mc_clear_user_page,
		.cpu_copy_user_page = xscale_mc_copy_user_page,
		};

arch/arm/mm/minicache.c

+0 −73

Original line number	Diff line number	Diff line
		/*
		* linux/arch/arm/mm/minicache.c
		*
		* Copyright (C) 2001 Russell King
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*
		* This handles the mini data cache, as found on SA11x0 and XScale
		* processors. When we copy a user page page, we map it in such a way
		* that accesses to this page will not touch the main data cache, but
		* will be cached in the mini data cache. This prevents us thrashing
		* the main data cache on page faults.
		*/
		#include <linux/init.h>
		#include <linux/mm.h>

		#include <asm/page.h>
		#include <asm/pgtable.h>
		#include <asm/tlbflush.h>

		/*
		* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
		* specific hacks for copying pages efficiently.
		*/
		#define minicache_address (0xffff8000)
		#define minicache_pgprot __pgprot(L_PTE_PRESENT \| L_PTE_YOUNG \| \
		L_PTE_CACHEABLE)

		static pte_t *minicache_pte;

		/*
		* Note that this is intended to be called only from the copy_user_page
		* asm code; anything else will require special locking to prevent the
		* mini-cache space being re-used. (Note: probably preempt unsafe).
		*
		* We rely on the fact that the minicache is 2K, and we'll be pushing
		* 4K of data through it, so we don't actually have to specifically
		* flush the minicache when we change the mapping.
		*
		* Note also: assert(PAGE_OFFSET <= virt < high_memory).
		* Unsafe: preempt, kmap.
		*/
		unsigned long map_page_minicache(unsigned long virt)
		{
		set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot));
		flush_tlb_kernel_page(minicache_address);

		return minicache_address;
		}

		static int __init minicache_init(void)
		{
		pgd_t *pgd;
		pmd_t *pmd;

		spin_lock(&init_mm.page_table_lock);

		pgd = pgd_offset_k(minicache_address);
		pmd = pmd_alloc(&init_mm, pgd, minicache_address);
		if (!pmd)
		BUG();
		minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address);
		if (!minicache_pte)
		BUG();

		spin_unlock(&init_mm.page_table_lock);

		return 0;
		}

		core_initcall(minicache_init);