Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit de79f7b9 authored by Paul Mackerras's avatar Paul Mackerras Committed by Benjamin Herrenschmidt
Browse files

powerpc: Put FP/VSX and VR state into structures



This creates new 'thread_fp_state' and 'thread_vr_state' structures
to store FP/VSX state (including FPSCR) and Altivec/VSX state
(including VSCR), and uses them in the thread_struct.  In the
thread_fp_state, the FPRs and VSRs are represented as u64 rather
than double, since we rarely perform floating-point computations
on the values, and this will enable the structures to be used
in KVM code as well.  Similarly FPSCR is now a u64 rather than
a structure of two 32-bit values.

This takes the offsets out of the macros such as SAVE_32FPRS,
REST_32FPRS, etc.  This enables the same macros to be used for normal
and transactional state, enabling us to delete the transactional
versions of the macros.   This also removes the unused do_load_up_fpu
and do_load_up_altivec, which were in fact buggy since they didn't
create large enough stack frames to account for the fact that
load_up_fpu and load_up_altivec are not designed to be called from C
and assume that their caller's stack frame is an interrupt frame.

Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 8e0a1611
Loading
Loading
Loading
Loading
+6 −89
Original line number Diff line number Diff line
@@ -98,123 +98,40 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
#define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)

#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
#define SAVE_FPR(n, base)	stfd	n,8*TS_FPRWIDTH*(n)(base)
#define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
#define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
#define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
#define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
#define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
#define REST_FPR(n, base)	lfd	n,8*TS_FPRWIDTH*(n)(base)
#define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
#define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
#define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
#define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
#define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)

#define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,base,b
#define SAVE_VR(n,b,base)	li b,16*(n);  stvx n,base,b
#define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
#define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
#define SAVE_8VRS(n,b,base)	SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
#define SAVE_16VRS(n,b,base)	SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
#define SAVE_32VRS(n,b,base)	SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
#define REST_VR(n,b,base)	li b,THREAD_VR0+(16*(n)); lvx n,base,b
#define REST_VR(n,b,base)	li b,16*(n); lvx n,base,b
#define REST_2VRS(n,b,base)	REST_VR(n,b,base); REST_VR(n+1,b,base)
#define REST_4VRS(n,b,base)	REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
#define REST_8VRS(n,b,base)	REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
#define REST_16VRS(n,b,base)	REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
#define REST_32VRS(n,b,base)	REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)

/* Save/restore FPRs, VRs and VSRs from their checkpointed backups in
 * thread_struct:
 */
#define SAVE_FPR_TRANSACT(n, base)	stfd n,THREAD_TRANSACT_FPR0+	\
					8*TS_FPRWIDTH*(n)(base)
#define SAVE_2FPRS_TRANSACT(n, base)	SAVE_FPR_TRANSACT(n, base);	\
					SAVE_FPR_TRANSACT(n+1, base)
#define SAVE_4FPRS_TRANSACT(n, base)	SAVE_2FPRS_TRANSACT(n, base);	\
					SAVE_2FPRS_TRANSACT(n+2, base)
#define SAVE_8FPRS_TRANSACT(n, base)	SAVE_4FPRS_TRANSACT(n, base);	\
					SAVE_4FPRS_TRANSACT(n+4, base)
#define SAVE_16FPRS_TRANSACT(n, base)	SAVE_8FPRS_TRANSACT(n, base);	\
					SAVE_8FPRS_TRANSACT(n+8, base)
#define SAVE_32FPRS_TRANSACT(n, base)	SAVE_16FPRS_TRANSACT(n, base);	\
					SAVE_16FPRS_TRANSACT(n+16, base)

#define REST_FPR_TRANSACT(n, base)	lfd	n,THREAD_TRANSACT_FPR0+	\
					8*TS_FPRWIDTH*(n)(base)
#define REST_2FPRS_TRANSACT(n, base)	REST_FPR_TRANSACT(n, base);	\
					REST_FPR_TRANSACT(n+1, base)
#define REST_4FPRS_TRANSACT(n, base)	REST_2FPRS_TRANSACT(n, base);	\
					REST_2FPRS_TRANSACT(n+2, base)
#define REST_8FPRS_TRANSACT(n, base)	REST_4FPRS_TRANSACT(n, base);	\
					REST_4FPRS_TRANSACT(n+4, base)
#define REST_16FPRS_TRANSACT(n, base)	REST_8FPRS_TRANSACT(n, base);	\
					REST_8FPRS_TRANSACT(n+8, base)
#define REST_32FPRS_TRANSACT(n, base)	REST_16FPRS_TRANSACT(n, base);	\
					REST_16FPRS_TRANSACT(n+16, base)


#define SAVE_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
					stvx n,b,base
#define SAVE_2VRS_TRANSACT(n,b,base)	SAVE_VR_TRANSACT(n,b,base);	\
					SAVE_VR_TRANSACT(n+1,b,base)
#define SAVE_4VRS_TRANSACT(n,b,base)	SAVE_2VRS_TRANSACT(n,b,base);	\
					SAVE_2VRS_TRANSACT(n+2,b,base)
#define SAVE_8VRS_TRANSACT(n,b,base)	SAVE_4VRS_TRANSACT(n,b,base);	\
					SAVE_4VRS_TRANSACT(n+4,b,base)
#define SAVE_16VRS_TRANSACT(n,b,base)	SAVE_8VRS_TRANSACT(n,b,base);	\
					SAVE_8VRS_TRANSACT(n+8,b,base)
#define SAVE_32VRS_TRANSACT(n,b,base)	SAVE_16VRS_TRANSACT(n,b,base);	\
					SAVE_16VRS_TRANSACT(n+16,b,base)

#define REST_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
					lvx n,b,base
#define REST_2VRS_TRANSACT(n,b,base)	REST_VR_TRANSACT(n,b,base);	\
					REST_VR_TRANSACT(n+1,b,base)
#define REST_4VRS_TRANSACT(n,b,base)	REST_2VRS_TRANSACT(n,b,base);	\
					REST_2VRS_TRANSACT(n+2,b,base)
#define REST_8VRS_TRANSACT(n,b,base)	REST_4VRS_TRANSACT(n,b,base);	\
					REST_4VRS_TRANSACT(n+4,b,base)
#define REST_16VRS_TRANSACT(n,b,base)	REST_8VRS_TRANSACT(n,b,base);	\
					REST_8VRS_TRANSACT(n+8,b,base)
#define REST_32VRS_TRANSACT(n,b,base)	REST_16VRS_TRANSACT(n,b,base);	\
					REST_16VRS_TRANSACT(n+16,b,base)


#define SAVE_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
					STXVD2X(n,R##base,R##b)
#define SAVE_2VSRS_TRANSACT(n,b,base)	SAVE_VSR_TRANSACT(n,b,base);	\
	                                SAVE_VSR_TRANSACT(n+1,b,base)
#define SAVE_4VSRS_TRANSACT(n,b,base)	SAVE_2VSRS_TRANSACT(n,b,base);	\
	                                SAVE_2VSRS_TRANSACT(n+2,b,base)
#define SAVE_8VSRS_TRANSACT(n,b,base)	SAVE_4VSRS_TRANSACT(n,b,base);	\
	                                SAVE_4VSRS_TRANSACT(n+4,b,base)
#define SAVE_16VSRS_TRANSACT(n,b,base)	SAVE_8VSRS_TRANSACT(n,b,base);	\
	                                SAVE_8VSRS_TRANSACT(n+8,b,base)
#define SAVE_32VSRS_TRANSACT(n,b,base)	SAVE_16VSRS_TRANSACT(n,b,base);	\
	                                SAVE_16VSRS_TRANSACT(n+16,b,base)

#define REST_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
					LXVD2X(n,R##base,R##b)
#define REST_2VSRS_TRANSACT(n,b,base)	REST_VSR_TRANSACT(n,b,base);    \
	                                REST_VSR_TRANSACT(n+1,b,base)
#define REST_4VSRS_TRANSACT(n,b,base)	REST_2VSRS_TRANSACT(n,b,base);	\
	                                REST_2VSRS_TRANSACT(n+2,b,base)
#define REST_8VSRS_TRANSACT(n,b,base)	REST_4VSRS_TRANSACT(n,b,base);	\
	                                REST_4VSRS_TRANSACT(n+4,b,base)
#define REST_16VSRS_TRANSACT(n,b,base)	REST_8VSRS_TRANSACT(n,b,base);	\
	                                REST_8VSRS_TRANSACT(n+8,b,base)
#define REST_32VSRS_TRANSACT(n,b,base)	REST_16VSRS_TRANSACT(n,b,base);	\
	                                REST_16VSRS_TRANSACT(n+16,b,base)

/* Save the lower 32 VSRs in the thread VSR region */
#define SAVE_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n));  STXVD2X(n,R##base,R##b)
#define SAVE_VSR(n,b,base)	li b,16*(n);  STXVD2X(n,R##base,R##b)
#define SAVE_2VSRS(n,b,base)	SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
#define SAVE_4VSRS(n,b,base)	SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
#define SAVE_8VSRS(n,b,base)	SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
#define SAVE_16VSRS(n,b,base)	SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
#define SAVE_32VSRS(n,b,base)	SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
#define REST_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); LXVD2X(n,R##base,R##b)
#define REST_VSR(n,b,base)	li b,16*(n); LXVD2X(n,R##base,R##b)
#define REST_2VSRS(n,b,base)	REST_VSR(n,b,base); REST_VSR(n+1,b,base)
#define REST_4VSRS(n,b,base)	REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
#define REST_8VSRS(n,b,base)	REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
+18 −22
Original line number Diff line number Diff line
@@ -144,8 +144,20 @@ typedef struct {

#define TS_FPROFFSET 0
#define TS_VSRLOWOFFSET 1
#define TS_FPR(i) fpr[i][TS_FPROFFSET]
#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]

/* FP and VSX 0-31 register set */
struct thread_fp_state {
	u64	fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
	u64	fpscr;		/* Floating point status */
};

/* Complete AltiVec register set including VSCR */
struct thread_vr_state {
	vector128	vr[32] __attribute__((aligned(16)));
	vector128	vscr __attribute__((aligned(16)));
};

struct thread_struct {
	unsigned long	ksp;		/* Kernel stack pointer */
@@ -198,13 +210,7 @@ struct thread_struct {
	unsigned long	dvc2;
#endif
#endif
	/* FP and VSX 0-31 register set */
	double		fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
	struct {

		unsigned int pad;
		unsigned int val;	/* Floating point status */
	} fpscr;
	struct thread_fp_state	fp_state;
	int		fpexc_mode;	/* floating-point exception mode */
	unsigned int	align_ctl;	/* alignment handling control */
#ifdef CONFIG_PPC64
@@ -222,10 +228,7 @@ struct thread_struct {
	struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
	unsigned long	trap_nr;	/* last trap # on this thread */
#ifdef CONFIG_ALTIVEC
	/* Complete AltiVec register set */
	vector128	vr[32] __attribute__((aligned(16)));
	/* AltiVec status */
	vector128	vscr __attribute__((aligned(16)));
	struct thread_vr_state vr_state;
	unsigned long	vrsave;
	int		used_vr;	/* set if process has used altivec */
#endif /* CONFIG_ALTIVEC */
@@ -262,13 +265,8 @@ struct thread_struct {
	 * transact_fpr[] is the new set of transactional values.
	 * VRs work the same way.
	 */
	double		transact_fpr[32][TS_FPRWIDTH];
	struct {
		unsigned int pad;
		unsigned int val;	/* Floating point status */
	} transact_fpscr;
	vector128	transact_vr[32] __attribute__((aligned(16)));
	vector128	transact_vscr __attribute__((aligned(16)));
	struct thread_fp_state transact_fp;
	struct thread_vr_state transact_vr;
	unsigned long	transact_vrsave;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -322,8 +320,6 @@ struct thread_struct {
	.ksp = INIT_SP, \
	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
	.fs = KERNEL_DS, \
	.fpr = {{0}}, \
	.fpscr = { .val = 0, }, \
	.fpexc_mode = 0, \
	.ppr = INIT_PPR, \
}
+1 −1
Original line number Diff line number Diff line
@@ -125,7 +125,7 @@
#define FP_EX_DIVZERO         (1 << (31 - 5))
#define FP_EX_INEXACT         (1 << (31 - 6))

#define __FPU_FPSCR	(current->thread.fpscr.val)
#define __FPU_FPSCR	(current->thread.fp_state.fpscr)

/* We only actually write to the destination register
 * if exceptions signalled (if any) will not trap.
+3 −3
Original line number Diff line number Diff line
@@ -660,7 +660,7 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
	if (reg < 32)
		ptr = (char *) &current->thread.TS_FPR(reg);
	else
		ptr = (char *) &current->thread.vr[reg - 32];
		ptr = (char *) &current->thread.vr_state.vr[reg - 32];

	lptr = (unsigned long *) ptr;

@@ -897,7 +897,7 @@ int fix_alignment(struct pt_regs *regs)
				return -EFAULT;
		}
	} else if (flags & F) {
		data.dd = current->thread.TS_FPR(reg);
		data.ll = current->thread.TS_FPR(reg);
		if (flags & S) {
			/* Single-precision FP store requires conversion... */
#ifdef CONFIG_PPC_FPU
@@ -975,7 +975,7 @@ int fix_alignment(struct pt_regs *regs)
		if (unlikely(ret))
			return -EFAULT;
	} else if (flags & F)
		current->thread.TS_FPR(reg) = data.dd;
		current->thread.TS_FPR(reg) = data.ll;
	else
		regs->gpr[reg] = data.ll;

+8 −17
Original line number Diff line number Diff line
@@ -90,16 +90,15 @@ int main(void)
	DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
#endif
	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
	DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
	DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
#ifdef CONFIG_ALTIVEC
	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
	DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
	DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
	DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
	DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
#endif /* CONFIG_VSX */
#ifdef CONFIG_PPC64
@@ -143,20 +142,12 @@ int main(void)
	DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
	DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
	DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
	DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
					 transact_vr[0]));
	DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct,
					  transact_vscr));
	DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
						 transact_vr));
	DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
					    transact_vrsave));
	DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct,
					  transact_fpr[0]));
	DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct,
					   transact_fpscr));
#ifdef CONFIG_VSX
	DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct,
					  transact_fpr[0]));
#endif
	DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
						 transact_fp));
	/* Local pt_regs on stack for Transactional Memory funcs. */
	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
	       sizeof(struct pt_regs) + 16);
Loading