sh: Support denormalization on SH-4 FPU. (c8c0a1ab) · Commits · e / devices / android_kernel_xiaomi_markw

arch/sh/kernel/cpu/sh4/Makefile

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -5,7 +5,7 @@
	obj-y := probe.o common.o		obj-y := probe.o common.o
	common-y += $(addprefix ../sh3/, entry.o ex.o)		common-y += $(addprefix ../sh3/, entry.o ex.o)

	obj-$(CONFIG_SH_FPU) += fpu.o		obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o
	obj-$(CONFIG_SH_STORE_QUEUES) += sq.o		obj-$(CONFIG_SH_STORE_QUEUES) += sq.o

	# CPU subtype setup		# CPU subtype setup

arch/sh/kernel/cpu/sh4/fpu.c

+334 −180

Original line number	Original line	Diff line number	Diff line
	/* $Id: fpu.c,v 1.4 2004/01/13 05:52:11 kkojima Exp $		/*
	*
	* linux/arch/sh/kernel/fpu.c
	*
	* Save/restore floating point context for signal handlers.		* Save/restore floating point context for signal handlers.
	*		*
	* This file is subject to the terms and conditions of the GNU General Public		* This file is subject to the terms and conditions of the GNU General Public
	@@ -9,15 +6,16 @@
	* for more details.		* for more details.
	*		*
	* Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka		* Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
			* Copyright (C) 2006 ST Microelectronics Ltd. (denorm support)
	*		*
	* FIXME! These routines can be optimized in big endian case.		* FIXME! These routines have not been tested for big endian case.
	*/		*/

	#include <linux/sched.h>		#include <linux/sched.h>
	#include <linux/signal.h>		#include <linux/signal.h>
			#include <linux/io.h>
			#include <asm/cpu/fpu.h>
	#include <asm/processor.h>		#include <asm/processor.h>
	#include <asm/system.h>		#include <asm/system.h>
	#include <asm/io.h>

	/* The PR (precision) bit in the FP Status Register must be clear when		/* The PR (precision) bit in the FP Status Register must be clear when
	* an frchg instruction is executed, otherwise the instruction is undefined.		* an frchg instruction is executed, otherwise the instruction is undefined.
	@@ -25,14 +23,26 @@
	*/		*/

	#define FPSCR_RCHG 0x00000000		#define FPSCR_RCHG 0x00000000
			extern unsigned long long float64_div(unsigned long long a,
			unsigned long long b);
			extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
			extern unsigned long long float64_mul(unsigned long long a,
			unsigned long long b);
			extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
			extern unsigned long long float64_add(unsigned long long a,
			unsigned long long b);
			extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
			extern unsigned long long float64_sub(unsigned long long a,
			unsigned long long b);
			extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);

			static unsigned int fpu_exception_flags;

	/*		/*
	* Save FPU registers onto task structure.		* Save FPU registers onto task structure.
	* Assume called with FPU enabled (SR.FD=0).		* Assume called with FPU enabled (SR.FD=0).
	*/		*/
	void		void save_fpu(struct task_struct tsk, struct pt_regs regs)
	save_fpu(struct task_struct tsk, struct pt_regs regs)
	{		{
	unsigned long dummy;		unsigned long dummy;

	@@ -75,19 +85,16 @@ save_fpu(struct task_struct tsk, struct pt_regs regs)
	"fmov.s fr2, @-%0\n\t"		"fmov.s fr2, @-%0\n\t"
	"fmov.s fr1, @-%0\n\t"		"fmov.s fr1, @-%0\n\t"
	"fmov.s fr0, @-%0\n\t"		"fmov.s fr0, @-%0\n\t"
	"lds %3, fpscr\n\t"		"lds %3, fpscr\n\t":"=r" (dummy)
	: "=r" (dummy)
	:"0"((char *)(&tsk->thread.fpu.hard.status)),		:"0"((char *)(&tsk->thread.fpu.hard.status)),
	"r" (FPSCR_RCHG),		"r"(FPSCR_RCHG), "r"(FPSCR_INIT)
	"r" (FPSCR_INIT)
	:"memory");		:"memory");

	disable_fpu();		disable_fpu();
	release_fpu(regs);		release_fpu(regs);
	}		}

	static void		static void restore_fpu(struct task_struct *tsk)
	restore_fpu(struct task_struct *tsk)
	{		{
	unsigned long dummy;		unsigned long dummy;

	@@ -141,8 +148,7 @@ restore_fpu(struct task_struct *tsk)
	* double precision represents signaling NANS.		* double precision represents signaling NANS.
	*/		*/

	static void		static void fpu_init(void)
	fpu_init(void)
	{		{
	enable_fpu();		enable_fpu();
	asm volatile ( "lds %0, fpul\n\t"		asm volatile ( "lds %0, fpul\n\t"
	@@ -194,8 +200,7 @@ fpu_init(void)
	* @fpu: Pointer to sh_fpu_hard structure		* @fpu: Pointer to sh_fpu_hard structure
	* @n: Index to FP register		* @n: Index to FP register
	*/		*/
	static void		static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
	denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
	{		{
	unsigned long du, dl;		unsigned long du, dl;
	unsigned long x = fpu->fpul;		unsigned long x = fpu->fpul;
	@@ -223,8 +228,7 @@ denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
	*		*
	* Returns 1 when it's handled (should not cause exception).		* Returns 1 when it's handled (should not cause exception).
	*/		*/
	static int		static int ieee_fpe_handler(struct pt_regs *regs)
	ieee_fpe_handler (struct pt_regs *regs)
	{		{
	unsigned short insn = (unsigned short )regs->pc;		unsigned short insn = (unsigned short )regs->pc;
	unsigned short finsn;		unsigned short finsn;
	@@ -233,35 +237,42 @@ ieee_fpe_handler (struct pt_regs *regs)
	(insn >> 12) & 0xf,		(insn >> 12) & 0xf,
	(insn >> 8) & 0xf,		(insn >> 8) & 0xf,
	(insn >> 4) & 0xf,		(insn >> 4) & 0xf,
	insn & 0xf};		insn & 0xf
			};

			if (nib[0] == 0xb \|\| (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
			regs->pr = regs->pc + 4; /* bsr & jsr */

	if (nib[0] == 0xb \|\|		if (nib[0] == 0xa \|\| nib[0] == 0xb) {
	(nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */		/* bra & bsr */
	regs->pr = regs->pc + 4;
	if (nib[0] == 0xa \|\| nib[0] == 0xb) { /* bra & bsr */
	nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
	finsn = (unsigned short )(regs->pc + 2);		finsn = (unsigned short )(regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */		} else if (nib[0] == 0x8 && nib[1] == 0xd) {
			/* bt/s */
	if (regs->sr & 1)		if (regs->sr & 1)
	nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);		nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
	else		else
	nextpc = regs->pc + 4;		nextpc = regs->pc + 4;
	finsn = (unsigned short )(regs->pc + 2);		finsn = (unsigned short )(regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */		} else if (nib[0] == 0x8 && nib[1] == 0xf) {
			/* bf/s */
	if (regs->sr & 1)		if (regs->sr & 1)
	nextpc = regs->pc + 4;		nextpc = regs->pc + 4;
	else		else
	nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);		nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
	finsn = (unsigned short )(regs->pc + 2);		finsn = (unsigned short )(regs->pc + 2);
	} else if (nib[0] == 0x4 && nib[3] == 0xb &&		} else if (nib[0] == 0x4 && nib[3] == 0xb &&
	(nib[2] == 0x0 \|\| nib[2] == 0x2)) { /* jmp & jsr */		(nib[2] == 0x0 \|\| nib[2] == 0x2)) {
			/* jmp & jsr */
	nextpc = regs->regs[nib[1]];		nextpc = regs->regs[nib[1]];
	finsn = (unsigned short )(regs->pc + 2);		finsn = (unsigned short )(regs->pc + 2);
	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&		} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
	(nib[2] == 0x0 \|\| nib[2] == 0x2)) { /* braf & bsrf */		(nib[2] == 0x0 \|\| nib[2] == 0x2)) {
			/* braf & bsrf */
	nextpc = regs->pc + 4 + regs->regs[nib[1]];		nextpc = regs->pc + 4 + regs->regs[nib[1]];
	finsn = (unsigned short )(regs->pc + 2);		finsn = (unsigned short )(regs->pc + 2);
	} else if (insn == 0x000b) { /* rts */		} else if (insn == 0x000b) {
			/* rts */
	nextpc = regs->pr;		nextpc = regs->pr;
	finsn = (unsigned short )(regs->pc + 2);		finsn = (unsigned short )(regs->pc + 2);
	} else {		} else {
	@@ -269,21 +280,139 @@ ieee_fpe_handler (struct pt_regs *regs)
	finsn = insn;		finsn = insn;
	}		}

	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */		if ((finsn & 0xf1ff) == 0xf0ad) {
			/* fcnvsd */
	struct task_struct *tsk = current;		struct task_struct *tsk = current;

	save_fpu(tsk, regs);		save_fpu(tsk, regs);
	if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) {		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
	/* FPU error */		/* FPU error */
	denormal_to_double(&tsk->thread.fpu.hard,		denormal_to_double(&tsk->thread.fpu.hard,
	(finsn >> 8) & 0xf);		(finsn >> 8) & 0xf);
	tsk->thread.fpu.hard.fpscr &=		else
	~(FPSCR_CAUSE_MASK \| FPSCR_FLAG_MASK);		return 0;
	grab_fpu(regs);
	restore_fpu(tsk);		regs->pc = nextpc;
	set_tsk_thread_flag(tsk, TIF_USEDFPU);		return 1;
			} else if ((finsn & 0xf00f) == 0xf002) {
			/* fmul */
			struct task_struct *tsk = current;
			int fpscr;
			int n, m, prec;
			unsigned int hx, hy;

			n = (finsn >> 8) & 0xf;
			m = (finsn >> 4) & 0xf;
			hx = tsk->thread.fpu.hard.fp_regs[n];
			hy = tsk->thread.fpu.hard.fp_regs[m];
			fpscr = tsk->thread.fpu.hard.fpscr;
			prec = fpscr & FPSCR_DBL_PRECISION;

			if ((fpscr & FPSCR_CAUSE_ERROR)
			&& (prec && ((hx & 0x7fffffff) < 0x00100000
			\|\| (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			\| tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			\| tsk->thread.fpu.hard.fp_regs[m + 1];
			llx = float64_mul(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
			} else if ((fpscr & FPSCR_CAUSE_ERROR)
			&& (!prec && ((hx & 0x7fffffff) < 0x00800000
			\|\| (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			hx = float32_mul(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
	} else		} else
	force_sig(SIGFPE, tsk);		return 0;

			regs->pc = nextpc;
			return 1;
			} else if ((finsn & 0xf00e) == 0xf000) {
			/* fadd, fsub */
			struct task_struct *tsk = current;
			int fpscr;
			int n, m, prec;
			unsigned int hx, hy;

			n = (finsn >> 8) & 0xf;
			m = (finsn >> 4) & 0xf;
			hx = tsk->thread.fpu.hard.fp_regs[n];
			hy = tsk->thread.fpu.hard.fp_regs[m];
			fpscr = tsk->thread.fpu.hard.fpscr;
			prec = fpscr & FPSCR_DBL_PRECISION;

			if ((fpscr & FPSCR_CAUSE_ERROR)
			&& (prec && ((hx & 0x7fffffff) < 0x00100000
			\|\| (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			\| tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			\| tsk->thread.fpu.hard.fp_regs[m + 1];
			if ((finsn & 0xf00f) == 0xf000)
			llx = float64_add(llx, lly);
			else
			llx = float64_sub(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
			} else if ((fpscr & FPSCR_CAUSE_ERROR)
			&& (!prec && ((hx & 0x7fffffff) < 0x00800000
			\|\| (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			if ((finsn & 0xf00f) == 0xf000)
			hx = float32_add(hx, hy);
			else
			hx = float32_sub(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
			} else
			return 0;

			regs->pc = nextpc;
			return 1;
			} else if ((finsn & 0xf003) == 0xf003) {
			/* fdiv */
			struct task_struct *tsk = current;
			int fpscr;
			int n, m, prec;
			unsigned int hx, hy;

			n = (finsn >> 8) & 0xf;
			m = (finsn >> 4) & 0xf;
			hx = tsk->thread.fpu.hard.fp_regs[n];
			hy = tsk->thread.fpu.hard.fp_regs[m];
			fpscr = tsk->thread.fpu.hard.fpscr;
			prec = fpscr & FPSCR_DBL_PRECISION;

			if ((fpscr & FPSCR_CAUSE_ERROR)
			&& (prec && ((hx & 0x7fffffff) < 0x00100000
			\|\| (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			\| tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			\| tsk->thread.fpu.hard.fp_regs[m + 1];

			llx = float64_div(llx, lly);

			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
			} else if ((fpscr & FPSCR_CAUSE_ERROR)
			&& (!prec && ((hx & 0x7fffffff) < 0x00800000
			\|\| (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			hx = float32_div(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
			} else
			return 0;

	regs->pc = nextpc;		regs->pc = nextpc;
	return 1;		return 1;
	@@ -292,16 +421,41 @@ ieee_fpe_handler (struct pt_regs *regs)
	return 0;		return 0;
	}		}

			void float_raise(unsigned int flags)
			{
			fpu_exception_flags \|= flags;
			}

			int float_rounding_mode(void)
			{
			struct task_struct *tsk = current;
			int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
			return roundingMode;
			}

	BUILD_TRAP_HANDLER(fpu_error)		BUILD_TRAP_HANDLER(fpu_error)
	{		{
	struct task_struct *tsk = current;		struct task_struct *tsk = current;
	TRAP_HANDLER_DECL;		TRAP_HANDLER_DECL;

	if (ieee_fpe_handler(regs))		save_fpu(tsk, regs);
			fpu_exception_flags = 0;
			if (ieee_fpe_handler(regs)) {
			tsk->thread.fpu.hard.fpscr &=
			~(FPSCR_CAUSE_MASK \| FPSCR_FLAG_MASK);
			tsk->thread.fpu.hard.fpscr \|= fpu_exception_flags;
			/* Set the FPSCR flag as well as cause bits - simply
			* replicate the cause */
			tsk->thread.fpu.hard.fpscr \|= (fpu_exception_flags >> 10);
			grab_fpu(regs);
			restore_fpu(tsk);
			set_tsk_thread_flag(tsk, TIF_USEDFPU);
			if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
			(fpu_exception_flags >> 2)) == 0) {
	return;		return;
			}
			}

	regs->pc += 2;
	save_fpu(tsk, regs);
	force_sig(SIGFPE, tsk);		force_sig(SIGFPE, tsk);
	}		}