Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5636919b authored by Ralf Baechle's avatar Ralf Baechle
Browse files

MIPS: Outline udelay and fix a few issues.



Outlining fixes the issue were on certain CPUs such as the R10000 family
the delay loop would need an extra cycle if it overlaps a cacheline
boundary.

The rewrite also fixes build errors with GCC 4.4 which was changed in
way incompatible with the kernel's inline assembly.

Relying on pure C for computation of the delay value removes the need for
explicit.  The price we pay is a slight slowdown of the computation - to
be fixed on another day.

Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 3a553147
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -39,8 +39,8 @@ struct cache_desc {
#define MIPS_CACHE_PINDEX	0x00000020	/* Physically indexed cache */

struct cpuinfo_mips {
	unsigned long		udelay_val;
	unsigned long		asid_cache;
	unsigned int		udelay_val;
	unsigned int		asid_cache;

	/*
	 * Capability and feature descriptor structure for MIPS CPU
+5 −87
Original line number Diff line number Diff line
@@ -11,94 +11,12 @@
#ifndef _ASM_DELAY_H
#define _ASM_DELAY_H

#include <linux/param.h>
#include <linux/smp.h>
extern void __delay(unsigned int loops);
extern void __ndelay(unsigned int ns);
extern void __udelay(unsigned int us);

#include <asm/compiler.h>
#include <asm/war.h>

static inline void __delay(unsigned long loops)
{
	if (sizeof(long) == 4)
		__asm__ __volatile__ (
		"	.set	noreorder				\n"
		"	.align	3					\n"
		"1:	bnez	%0, 1b					\n"
		"	subu	%0, 1					\n"
		"	.set	reorder					\n"
		: "=r" (loops)
		: "0" (loops));
	else if (sizeof(long) == 8 && !DADDI_WAR)
		__asm__ __volatile__ (
		"	.set	noreorder				\n"
		"	.align	3					\n"
		"1:	bnez	%0, 1b					\n"
		"	dsubu	%0, 1					\n"
		"	.set	reorder					\n"
		: "=r" (loops)
		: "0" (loops));
	else if (sizeof(long) == 8 && DADDI_WAR)
		__asm__ __volatile__ (
		"	.set	noreorder				\n"
		"	.align	3					\n"
		"1:	bnez	%0, 1b					\n"
		"	dsubu	%0, %2					\n"
		"	.set	reorder					\n"
		: "=r" (loops)
		: "0" (loops), "r" (1));
}


/*
 * Division by multiplication: you don't have to worry about
 * loss of precision.
 *
 * Use only for very small delays ( < 1 msec).  Should probably use a
 * lookup table, really, as the multiplications take much too long with
 * short delays.  This is a "reasonable" implementation, though (and the
 * first constant multiplications gets optimized away if the delay is
 * a constant)
 */

static inline void __udelay(unsigned long usecs, unsigned long lpj)
{
	unsigned long hi, lo;

	/*
	 * The rates of 128 is rounded wrongly by the catchall case
	 * for 64-bit.  Excessive precission?  Probably ...
	 */
#if defined(CONFIG_64BIT) && (HZ == 128)
	usecs *= 0x0008637bd05af6c7UL;		/* 2**64 / (1000000 / HZ) */
#elif defined(CONFIG_64BIT)
	usecs *= (0x8000000000000000UL / (500000 / HZ));
#else /* 32-bit junk follows here */
	usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
	                           0x80000000ULL) >> 32);
#endif

	if (sizeof(long) == 4)
		__asm__("multu\t%2, %3"
		: "=h" (usecs), "=l" (lo)
		: "r" (usecs), "r" (lpj)
		: GCC_REG_ACCUM);
	else if (sizeof(long) == 8 && !R4000_WAR)
		__asm__("dmultu\t%2, %3"
		: "=h" (usecs), "=l" (lo)
		: "r" (usecs), "r" (lpj)
		: GCC_REG_ACCUM);
	else if (sizeof(long) == 8 && R4000_WAR)
		__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
		: "=r" (usecs), "=h" (hi), "=l" (lo)
		: "r" (usecs), "r" (lpj)
		: GCC_REG_ACCUM);

	__delay(usecs);
}

#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val

#define udelay(usecs) __udelay((usecs), __udelay_val)
#define ndelay(ns) __udelay(ns)
#define udelay(us) __udelay(us)

/* make sure "usecs *= ..." in udelay do not overflow. */
#if HZ >= 1000
+1 −1
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
	seq_printf(m, fmt, __cpu_name[n],
	                           (version >> 4) & 0x0f, version & 0x0f,
	                           (fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
	seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n",
	seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
	              cpu_data[n].udelay_val / (500000/HZ),
	              (cpu_data[n].udelay_val / (5000/HZ)) % 100);
	seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no");
+2 −2
Original line number Diff line number Diff line
@@ -2,8 +2,8 @@
# Makefile for MIPS-specific library files..
#

lib-y	+= csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \
	   strncpy_user.o strnlen_user.o uncached.o
lib-y	+= csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \
	   strlen_user.o strncpy_user.o strnlen_user.o uncached.o

obj-y			+= iomap.o
obj-$(CONFIG_PCI)	+= iomap-pci.o

arch/mips/lib/delay.c

0 → 100644
+56 −0
Original line number Diff line number Diff line
/*
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Copyright (C) 1994 by Waldorf Electronics
 * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
 * Copyright (C) 2007  Maciej W. Rozycki
 */
#include <linux/module.h>
#include <linux/param.h>
#include <linux/smp.h>

#include <asm/compiler.h>
#include <asm/war.h>

inline void __delay(unsigned int loops)
{
	__asm__ __volatile__ (
	"	.set	noreorder				\n"
	"	.align	3					\n"
	"1:	bnez	%0, 1b					\n"
	"	subu	%0, 1					\n"
	"	.set	reorder					\n"
	: "=r" (loops)
	: "0" (loops));
}
EXPORT_SYMBOL(__delay);

/*
 * Division by multiplication: you don't have to worry about
 * loss of precision.
 *
 * Use only for very small delays ( < 1 msec).  Should probably use a
 * lookup table, really, as the multiplications take much too long with
 * short delays.  This is a "reasonable" implementation, though (and the
 * first constant multiplications gets optimized away if the delay is
 * a constant)
 */

void __udelay(unsigned long us)
{
	unsigned int lpj = current_cpu_data.udelay_val;

	__delay((us * 0x000010c7 * HZ * lpj) >> 32);
}
EXPORT_SYMBOL(__udelay);

void __ndelay(unsigned long ns)
{
	unsigned int lpj = current_cpu_data.udelay_val;

	__delay((us * 0x00000005 * HZ * lpj) >> 32);
}
EXPORT_SYMBOL(__ndelay);