Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c8c0a1ab authored by Stuart Menefy's avatar Stuart Menefy Committed by Paul Mundt
Browse files

sh: Support denormalization on SH-4 FPU.

parent 453ec9c1
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -5,7 +5,7 @@
obj-y	:= probe.o common.o
obj-y	:= probe.o common.o
common-y	+= $(addprefix ../sh3/, entry.o ex.o)
common-y	+= $(addprefix ../sh3/, entry.o ex.o)


obj-$(CONFIG_SH_FPU)			+= fpu.o
obj-$(CONFIG_SH_FPU)			+= fpu.o softfloat.o
obj-$(CONFIG_SH_STORE_QUEUES)		+= sq.o
obj-$(CONFIG_SH_STORE_QUEUES)		+= sq.o


# CPU subtype setup
# CPU subtype setup
+334 −180
Original line number Original line Diff line number Diff line
/* $Id: fpu.c,v 1.4 2004/01/13 05:52:11 kkojima Exp $
/*
 *
 * linux/arch/sh/kernel/fpu.c
 *
 * Save/restore floating point context for signal handlers.
 * Save/restore floating point context for signal handlers.
 *
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * This file is subject to the terms and conditions of the GNU General Public
@@ -9,15 +6,16 @@
 * for more details.
 * for more details.
 *
 *
 * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
 * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
 * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
 *
 *
 * FIXME! These routines can be optimized in big endian case.
 * FIXME! These routines have not been tested for big endian case.
 */
 */

#include <linux/sched.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/signal.h>
#include <linux/io.h>
#include <asm/cpu/fpu.h>
#include <asm/processor.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/system.h>
#include <asm/io.h>


/* The PR (precision) bit in the FP Status Register must be clear when
/* The PR (precision) bit in the FP Status Register must be clear when
 * an frchg instruction is executed, otherwise the instruction is undefined.
 * an frchg instruction is executed, otherwise the instruction is undefined.
@@ -25,14 +23,26 @@
 */
 */


#define FPSCR_RCHG 0x00000000
#define FPSCR_RCHG 0x00000000
extern unsigned long long float64_div(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
extern unsigned long long float64_mul(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
extern unsigned long long float64_add(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
extern unsigned long long float64_sub(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);


static unsigned int fpu_exception_flags;


/*
/*
 * Save FPU registers onto task structure.
 * Save FPU registers onto task structure.
 * Assume called with FPU enabled (SR.FD=0).
 * Assume called with FPU enabled (SR.FD=0).
 */
 */
void
void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
save_fpu(struct task_struct *tsk, struct pt_regs *regs)
{
{
	unsigned long dummy;
	unsigned long dummy;


@@ -75,19 +85,16 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs)
		      "fmov.s	fr2, @-%0\n\t"
		      "fmov.s	fr2, @-%0\n\t"
		      "fmov.s	fr1, @-%0\n\t"
		      "fmov.s	fr1, @-%0\n\t"
		      "fmov.s	fr0, @-%0\n\t"
		      "fmov.s	fr0, @-%0\n\t"
		     "lds	%3, fpscr\n\t"
		      "lds	%3, fpscr\n\t":"=r" (dummy)
		     : "=r" (dummy)
		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
		       "r" (FPSCR_RCHG),
		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
		       "r" (FPSCR_INIT)
		      :"memory");
		      :"memory");


	disable_fpu();
	disable_fpu();
	release_fpu(regs);
	release_fpu(regs);
}
}


static void
static void restore_fpu(struct task_struct *tsk)
restore_fpu(struct task_struct *tsk)
{
{
	unsigned long dummy;
	unsigned long dummy;


@@ -141,8 +148,7 @@ restore_fpu(struct task_struct *tsk)
 * double precision represents signaling NANS.
 * double precision represents signaling NANS.
 */
 */


static void
static void fpu_init(void)
fpu_init(void)
{
{
	enable_fpu();
	enable_fpu();
	asm volatile (	"lds	%0, fpul\n\t"
	asm volatile (	"lds	%0, fpul\n\t"
@@ -194,8 +200,7 @@ fpu_init(void)
 *      @fpu: Pointer to sh_fpu_hard structure
 *      @fpu: Pointer to sh_fpu_hard structure
 *      @n: Index to FP register
 *      @n: Index to FP register
 */
 */
static void
static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
{
{
	unsigned long du, dl;
	unsigned long du, dl;
	unsigned long x = fpu->fpul;
	unsigned long x = fpu->fpul;
@@ -223,8 +228,7 @@ denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
 *
 *
 *	Returns 1 when it's handled (should not cause exception).
 *	Returns 1 when it's handled (should not cause exception).
 */
 */
static int
static int ieee_fpe_handler(struct pt_regs *regs)
ieee_fpe_handler (struct pt_regs *regs)
{
{
	unsigned short insn = *(unsigned short *)regs->pc;
	unsigned short insn = *(unsigned short *)regs->pc;
	unsigned short finsn;
	unsigned short finsn;
@@ -233,35 +237,42 @@ ieee_fpe_handler (struct pt_regs *regs)
		(insn >> 12) & 0xf,
		(insn >> 12) & 0xf,
		(insn >> 8) & 0xf,
		(insn >> 8) & 0xf,
		(insn >> 4) & 0xf,
		(insn >> 4) & 0xf,
		insn & 0xf};
		insn & 0xf
	};

	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
		regs->pr = regs->pc + 4;  /* bsr & jsr */


	if (nib[0] == 0xb ||
	if (nib[0] == 0xa || nib[0] == 0xb) {
	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
		/* bra & bsr */
		regs->pr = regs->pc + 4;
	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
		finsn = *(unsigned short *)(regs->pc + 2);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
		/* bt/s */
		if (regs->sr & 1)
		if (regs->sr & 1)
			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
		else
		else
			nextpc = regs->pc + 4;
			nextpc = regs->pc + 4;
		finsn = *(unsigned short *)(regs->pc + 2);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
		/* bf/s */
		if (regs->sr & 1)
		if (regs->sr & 1)
			nextpc = regs->pc + 4;
			nextpc = regs->pc + 4;
		else
		else
			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
		finsn = *(unsigned short *)(regs->pc + 2);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
		   (nib[2] == 0x0 || nib[2] == 0x2)) {
		/* jmp & jsr */
		nextpc = regs->regs[nib[1]];
		nextpc = regs->regs[nib[1]];
		finsn = *(unsigned short *)(regs->pc + 2);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
		   (nib[2] == 0x0 || nib[2] == 0x2)) {
		/* braf & bsrf */
		nextpc = regs->pc + 4 + regs->regs[nib[1]];
		nextpc = regs->pc + 4 + regs->regs[nib[1]];
		finsn = *(unsigned short *)(regs->pc + 2);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (insn == 0x000b) { /* rts */
	} else if (insn == 0x000b) {
		/* rts */
		nextpc = regs->pr;
		nextpc = regs->pr;
		finsn = *(unsigned short *)(regs->pc + 2);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else {
	} else {
@@ -269,21 +280,139 @@ ieee_fpe_handler (struct pt_regs *regs)
		finsn = insn;
		finsn = insn;
	}
	}


	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
	if ((finsn & 0xf1ff) == 0xf0ad) {
		/* fcnvsd */
		struct task_struct *tsk = current;
		struct task_struct *tsk = current;


		save_fpu(tsk, regs);
		save_fpu(tsk, regs);
		if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) {
		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
			/* FPU error */
			/* FPU error */
			denormal_to_double(&tsk->thread.fpu.hard,
			denormal_to_double(&tsk->thread.fpu.hard,
					   (finsn >> 8) & 0xf);
					   (finsn >> 8) & 0xf);
			tsk->thread.fpu.hard.fpscr &=
		else
				~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
			return 0;
			grab_fpu(regs);

			restore_fpu(tsk);
		regs->pc = nextpc;
			set_tsk_thread_flag(tsk, TIF_USEDFPU);
		return 1;
	} else if ((finsn & 0xf00f) == 0xf002) {
		/* fmul */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & FPSCR_DBL_PRECISION;

		if ((fpscr & FPSCR_CAUSE_ERROR)
		    && (prec && ((hx & 0x7fffffff) < 0x00100000
				 || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			    | tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];
			llx = float64_mul(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_CAUSE_ERROR)
			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
					 || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			hx = float32_mul(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
		} else
			force_sig(SIGFPE, tsk);
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf00e) == 0xf000) {
		/* fadd, fsub */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & FPSCR_DBL_PRECISION;

		if ((fpscr & FPSCR_CAUSE_ERROR)
		    && (prec && ((hx & 0x7fffffff) < 0x00100000
				 || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			    | tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];
			if ((finsn & 0xf00f) == 0xf000)
				llx = float64_add(llx, lly);
			else
				llx = float64_sub(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_CAUSE_ERROR)
			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
					 || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			if ((finsn & 0xf00f) == 0xf000)
				hx = float32_add(hx, hy);
			else
				hx = float32_sub(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf003) == 0xf003) {
		/* fdiv */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & FPSCR_DBL_PRECISION;

		if ((fpscr & FPSCR_CAUSE_ERROR)
		    && (prec && ((hx & 0x7fffffff) < 0x00100000
				 || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			    | tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];

			llx = float64_div(llx, lly);

			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_CAUSE_ERROR)
			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
					 || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			hx = float32_div(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;


		regs->pc = nextpc;
		regs->pc = nextpc;
		return 1;
		return 1;
@@ -292,16 +421,41 @@ ieee_fpe_handler (struct pt_regs *regs)
	return 0;
	return 0;
}
}


void float_raise(unsigned int flags)
{
	fpu_exception_flags |= flags;
}

int float_rounding_mode(void)
{
	struct task_struct *tsk = current;
	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
	return roundingMode;
}

BUILD_TRAP_HANDLER(fpu_error)
BUILD_TRAP_HANDLER(fpu_error)
{
{
	struct task_struct *tsk = current;
	struct task_struct *tsk = current;
	TRAP_HANDLER_DECL;
	TRAP_HANDLER_DECL;


	if (ieee_fpe_handler(regs))
	save_fpu(tsk, regs);
	fpu_exception_flags = 0;
	if (ieee_fpe_handler(regs)) {
		tsk->thread.fpu.hard.fpscr &=
		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
		/* Set the FPSCR flag as well as cause bits - simply
		 * replicate the cause */
		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
		grab_fpu(regs);
		restore_fpu(tsk);
		set_tsk_thread_flag(tsk, TIF_USEDFPU);
		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
		     (fpu_exception_flags >> 2)) == 0) {
			return;
			return;
		}
	}


	regs->pc += 2;
	save_fpu(tsk, regs);
	force_sig(SIGFPE, tsk);
	force_sig(SIGFPE, tsk);
}
}