Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f8f98a93 authored by Russell King's avatar Russell King
Browse files

[PATCH] ARM: Fix Xscale copy_page implementation



The ARM copypage changes in 2.6.12-rc4-git1 removed the preempt locking
from the copypage functions which broke the XScale implementation.
This patch fixes the locking on XScale and removes the now unneeded
minicache code.

Signed-off-by: default avatarRussell King <rmk@arm.linux.org.uk>
Checked-by: Richard Purdie
parent 17d82fcc
Loading
Loading
Loading
Loading
+0 −7
Original line number Diff line number Diff line
@@ -228,7 +228,6 @@ config CPU_SA1100
	select CPU_CACHE_V4WB
	select CPU_CACHE_VIVT
	select CPU_TLB_V4WB
	select CPU_MINICACHE

# XScale
config CPU_XSCALE
@@ -239,7 +238,6 @@ config CPU_XSCALE
	select CPU_ABRT_EV5T
	select CPU_CACHE_VIVT
	select CPU_TLB_V4WBI
	select CPU_MINICACHE

# ARMv6
config CPU_V6
@@ -345,11 +343,6 @@ config CPU_TLB_V4WBI
config CPU_TLB_V6
	bool

config CPU_MINICACHE
	bool
	help
	  Processor has a minicache.

comment "Processor Features"

config ARM_THUMB
+0 −2
Original line number Diff line number Diff line
@@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o
obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o

obj-$(CONFIG_CPU_MINICACHE)	+= minicache.o

obj-$(CONFIG_CPU_TLB_V3)	+= tlb-v3.o
obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
obj-$(CONFIG_CPU_TLB_V4WB)	+= tlb-v4wb.o

arch/arm/mm/copypage-xscale.S

deleted100644 → 0
+0 −113
Original line number Diff line number Diff line
/*
 *  linux/arch/arm/lib/copypage-xscale.S
 *
 *  Copyright (C) 2001 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/constants.h>

/*
 * General note:
 *  We don't really want write-allocate cache behaviour for these functions
 *  since that will just eat through 8K of the cache.
 */

	.text
	.align	5
/*
 * XScale optimised copy_user_page
 *  r0 = destination
 *  r1 = source
 *  r2 = virtual user address of ultimate destination page
 *
 * The source page may have some clean entries in the cache already, but we
 * can safely ignore them - break_cow() will flush them out of the cache
 * if we eventually end up using our copied page.
 *
 * What we could do is use the mini-cache to buffer reads from the source
 * page.  We rely on the mini-cache being smaller than one page, so we'll
 * cycle through the complete cache anyway.
 */
ENTRY(xscale_mc_copy_user_page)
	stmfd	sp!, {r4, r5, lr}
	mov	r5, r0
	mov	r0, r1
	bl	map_page_minicache
	mov	r1, r5
	mov	lr, #PAGE_SZ/64-1

	/*
	 * Strangely enough, best performance is achieved
	 * when prefetching destination as well.  (NP)
	 */
	pld	[r0, #0]
	pld	[r0, #32]
	pld	[r1, #0]
	pld	[r1, #32]

1:	pld	[r0, #64]
	pld	[r0, #96]
	pld	[r1, #64]
	pld	[r1, #96]

2:	ldrd	r2, [r0], #8
	ldrd	r4, [r0], #8
	mov	ip, r1
	strd	r2, [r1], #8
	ldrd	r2, [r0], #8
	strd	r4, [r1], #8
	ldrd	r4, [r0], #8
	strd	r2, [r1], #8
	strd	r4, [r1], #8
	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
	ldrd	r2, [r0], #8
	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
	ldrd	r4, [r0], #8
	mov	ip, r1
	strd	r2, [r1], #8
	ldrd	r2, [r0], #8
	strd	r4, [r1], #8
	ldrd	r4, [r0], #8
	strd	r2, [r1], #8
	strd	r4, [r1], #8
	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
	subs	lr, lr, #1
	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
	bgt	1b
	beq	2b

	ldmfd	sp!, {r4, r5, pc}

	.align	5
/*
 * XScale optimised clear_user_page
 *  r0 = destination
 *  r1 = virtual user address of ultimate destination page
 */
ENTRY(xscale_mc_clear_user_page)
	mov	r1, #PAGE_SZ/32
	mov	r2, #0
	mov	r3, #0
1:	mov	ip, r0
	strd	r2, [r0], #8
	strd	r2, [r0], #8
	strd	r2, [r0], #8
	strd	r2, [r0], #8
	mcr	p15, 0, ip, c7, c10, 1		@ clean D line
	subs	r1, r1, #1
	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line
	bne	1b
	mov	pc, lr

	__INITDATA

	.type	xscale_mc_user_fns, #object
ENTRY(xscale_mc_user_fns)
	.long	xscale_mc_clear_user_page
	.long	xscale_mc_copy_user_page
	.size	xscale_mc_user_fns, . - xscale_mc_user_fns
+131 −0
Original line number Diff line number Diff line
/*
 *  linux/arch/arm/lib/copypage-xscale.S
 *
 *  Copyright (C) 1995-2005 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This handles the mini data cache, as found on SA11x0 and XScale
 * processors.  When we copy a user page page, we map it in such a way
 * that accesses to this page will not touch the main data cache, but
 * will be cached in the mini data cache.  This prevents us thrashing
 * the main data cache on page faults.
 */
#include <linux/init.h>
#include <linux/mm.h>

#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>

/*
 * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
 * specific hacks for copying pages efficiently.
 */
#define COPYPAGE_MINICACHE	0xffff8000

#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
				  L_PTE_CACHEABLE)

#define TOP_PTE(x)	pte_offset_kernel(top_pmd, x)

static DEFINE_SPINLOCK(minicache_lock);

/*
 * XScale mini-dcache optimised copy_user_page
 *
 * We flush the destination cache lines just before we write the data into the
 * corresponding address.  Since the Dcache is read-allocate, this removes the
 * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
 * and merged as appropriate.
 */
static void __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
	/*
	 * Strangely enough, best performance is achieved
	 * when prefetching destination as well.  (NP)
	 */
	asm volatile(
	"stmfd	sp!, {r4, r5, lr}		\n\
	mov	lr, %2				\n\
	pld	[r0, #0]			\n\
	pld	[r0, #32]			\n\
	pld	[r1, #0]			\n\
	pld	[r1, #32]			\n\
1:	pld	[r0, #64]			\n\
	pld	[r0, #96]			\n\
	pld	[r1, #64]			\n\
	pld	[r1, #96]			\n\
2:	ldrd	r2, [r0], #8			\n\
	ldrd	r4, [r0], #8			\n\
	mov	ip, r1				\n\
	strd	r2, [r1], #8			\n\
	ldrd	r2, [r0], #8			\n\
	strd	r4, [r1], #8			\n\
	ldrd	r4, [r0], #8			\n\
	strd	r2, [r1], #8			\n\
	strd	r4, [r1], #8			\n\
	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
	ldrd	r2, [r0], #8			\n\
	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
	ldrd	r4, [r0], #8			\n\
	mov	ip, r1				\n\
	strd	r2, [r1], #8			\n\
	ldrd	r2, [r0], #8			\n\
	strd	r4, [r1], #8			\n\
	ldrd	r4, [r0], #8			\n\
	strd	r2, [r1], #8			\n\
	strd	r4, [r1], #8			\n\
	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
	subs	lr, lr, #1			\n\
	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
	bgt	1b				\n\
	beq	2b				\n\
	ldmfd	sp!, {r4, r5, pc}		"
	:
	: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1));
}

void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
{
	spin_lock(&minicache_lock);

	set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
	flush_tlb_kernel_page(COPYPAGE_MINICACHE);

	mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);

	spin_unlock(&minicache_lock);
}

/*
 * XScale optimised clear_user_page
 */
void __attribute__((naked))
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
	asm volatile(
	"mov	r1, %0				\n\
	mov	r2, #0				\n\
	mov	r3, #0				\n\
1:	mov	ip, r0				\n\
	strd	r2, [r0], #8			\n\
	strd	r2, [r0], #8			\n\
	strd	r2, [r0], #8			\n\
	strd	r2, [r0], #8			\n\
	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
	subs	r1, r1, #1			\n\
	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
	bne	1b				\n\
	mov	pc, lr"
	:
	: "I" (PAGE_SIZE / 32));
}

struct cpu_user_fns xscale_mc_user_fns __initdata = {
	.cpu_clear_user_page	= xscale_mc_clear_user_page, 
	.cpu_copy_user_page	= xscale_mc_copy_user_page,
};
+0 −73
Original line number Diff line number Diff line
/*
 *  linux/arch/arm/mm/minicache.c
 *
 *  Copyright (C) 2001 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This handles the mini data cache, as found on SA11x0 and XScale
 * processors.  When we copy a user page page, we map it in such a way
 * that accesses to this page will not touch the main data cache, but
 * will be cached in the mini data cache.  This prevents us thrashing
 * the main data cache on page faults.
 */
#include <linux/init.h>
#include <linux/mm.h>

#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>

/*
 * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
 * specific hacks for copying pages efficiently.
 */
#define minicache_address (0xffff8000)
#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
				  L_PTE_CACHEABLE)

static pte_t *minicache_pte;

/*
 * Note that this is intended to be called only from the copy_user_page
 * asm code; anything else will require special locking to prevent the
 * mini-cache space being re-used.  (Note: probably preempt unsafe).
 *
 * We rely on the fact that the minicache is 2K, and we'll be pushing
 * 4K of data through it, so we don't actually have to specifically
 * flush the minicache when we change the mapping.
 *
 * Note also: assert(PAGE_OFFSET <= virt < high_memory).
 * Unsafe: preempt, kmap.
 */
unsigned long map_page_minicache(unsigned long virt)
{
	set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot));
	flush_tlb_kernel_page(minicache_address);

	return minicache_address;
}

static int __init minicache_init(void)
{
	pgd_t *pgd;
	pmd_t *pmd;

	spin_lock(&init_mm.page_table_lock);

	pgd = pgd_offset_k(minicache_address);
	pmd = pmd_alloc(&init_mm, pgd, minicache_address);
	if (!pmd)
		BUG();
	minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address);
	if (!minicache_pte)
		BUG();

	spin_unlock(&init_mm.page_table_lock);

	return 0;
}

core_initcall(minicache_init);