Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5e9377ec authored by Stuart Menefy's avatar Stuart Menefy Committed by Paul Mundt
Browse files

sh: Optimise memcpy_to/fromio for SH4



Optimise memcpy_to/fromio. This is used extensivly by MTD, so is a
worthwhile performance gain. The main savings come from not repeatedly
calling readl/writel, and doing word instead of byte at a time
transfers. Also using "movca.l" on SH4 gives a small performance win.

Signed-off-by: default avatarStuart Menefy <stuart.menefy@st.com>
Signed-off-by: default avatarPaul Mundt <lethal@linux-sh.org>
parent 8af57f8b
Loading
Loading
Loading
Loading
+72 −21
Original line number Original line Diff line number Diff line
/*
/*
 * linux/arch/sh/kernel/io.c
 * arch/sh/kernel/io.c - Machine independent I/O functions.
 *
 *
 * Copyright (C) 2000  Stuart Menefy
 * Copyright (C) 2000 - 2009  Stuart Menefy
 * Copyright (C) 2005  Paul Mundt
 * Copyright (C) 2005  Paul Mundt
 *
 *
 * Provide real functions which expand to whatever the header file defined.
 * Also definitions of machine independent IO functions.
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 * for more details.
@@ -18,33 +15,87 @@


/*
/*
 * Copy data from IO memory space to "real" memory space.
 * Copy data from IO memory space to "real" memory space.
 * This needs to be optimized.
 */
 */
void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned long count)
{
{
	unsigned char *p = to;
	/*
        while (count) {
	 * Would it be worthwhile doing byte and long transfers first
                count--;
	 * to try and get aligned?
                *p = readb(from);
	 */
                p++;
#ifdef CONFIG_CPU_SH4
	if ((count >= 0x20) &&
	     (((u32)to & 0x1f) == 0) && (((u32)from & 0x3) == 0)) {
		int tmp2, tmp3, tmp4, tmp5, tmp6;

		__asm__ __volatile__(
			"1:			\n\t"
			"mov.l	@%7+, r0	\n\t"
			"mov.l	@%7+, %2	\n\t"
			"movca.l r0, @%0	\n\t"
			"mov.l	@%7+, %3	\n\t"
			"mov.l	@%7+, %4	\n\t"
			"mov.l	@%7+, %5	\n\t"
			"mov.l	@%7+, %6	\n\t"
			"mov.l	@%7+, r7	\n\t"
			"mov.l	@%7+, r0	\n\t"
			"mov.l	%2, @(0x04,%0)	\n\t"
			"mov	#0x20, %2	\n\t"
			"mov.l	%3, @(0x08,%0)	\n\t"
			"sub	%2, %1		\n\t"
			"mov.l	%4, @(0x0c,%0)	\n\t"
			"cmp/hi	%1, %2		! T if 32 > count	\n\t"
			"mov.l	%5, @(0x10,%0)	\n\t"
			"mov.l	%6, @(0x14,%0)	\n\t"
			"mov.l	r7, @(0x18,%0)	\n\t"
			"mov.l	r0, @(0x1c,%0)	\n\t"
			"bf.s	1b		\n\t"
			" add	#0x20, %0	\n\t"
			: "=&r" (to), "=&r" (count),
			  "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4),
			  "=&r" (tmp5), "=&r" (tmp6), "=&r" (from)
			: "7"(from), "0" (to), "1" (count)
			: "r0", "r7", "t", "memory");
	}
#endif

	if ((((u32)to | (u32)from) & 0x3) == 0) {
		for (; count > 3; count -= 4) {
			*(u32 *)to = *(volatile u32 *)from;
			to += 4;
			from += 4;
		}
	}

	for (; count > 0; count--) {
		*(u8 *)to = *(volatile u8 *)from;
		to++;
		from++;
		from++;
	}
	}

	mb();
}
}
EXPORT_SYMBOL(memcpy_fromio);
EXPORT_SYMBOL(memcpy_fromio);


/*
/*
 * Copy data from "real" memory space to IO memory space.
 * Copy data from "real" memory space to IO memory space.
 * This needs to be optimized.
 */
 */
void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
{
{
	const unsigned char *p = from;
	if ((((u32)to | (u32)from) & 0x3) == 0) {
        while (count) {
		for ( ; count > 3; count -= 4) {
                count--;
			*(volatile u32 *)to = *(u32 *)from;
                writeb(*p, to);
			to += 4;
                p++;
			from += 4;
		}
	}

	for (; count > 0; count--) {
		*(volatile u8 *)to = *(u8 *)from;
		to++;
		to++;
		from++;
	}
	}

	mb();
}
}
EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_toio);