Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b445e26c authored by David S. Miller's avatar David S. Miller
Browse files

[SPARC64]: Avoid membar instructions in delay slots.



In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 020f46a3
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
	fmuld		%f0, %f2, %f26
	faddd		%f0, %f2, %f28
	fmuld		%f0, %f2, %f30
	b,pt		%xcc, fpdis_exit
	membar		#Sync
	b,pt		%xcc, fpdis_exit
	 nop
2:	andcc		%g5, FPRS_DU, %g0
	bne,pt		%icc, 3f
	 fzero		%f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
	fmuld		%f32, %f34, %f58
	faddd		%f32, %f34, %f60
	fmuld		%f32, %f34, %f62
	ba,pt		%xcc, fpdis_exit
	membar		#Sync
	ba,pt		%xcc, fpdis_exit
	 nop
3:	mov		SECONDARY_CONTEXT, %g3
	add		%g6, TI_FPREGS, %g1
	ldxa		[%g3] ASI_DMMU, %g5
+8 −4
Original line number Diff line number Diff line
@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
"	add	%1, %4, %1\n"
"	cas	[%3], %0, %1\n"
"	cmp	%0, %1\n"
"	bne,pn	%%icc, 1b\n"
"	membar	#StoreLoad | #StoreStore\n"
"	bne,pn	%%icc, 1b\n"
"	 nop\n"
	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
	: "r" (&sem->count), "r" (incr), "m" (sem->count)
	: "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
"	cmp	%%g1, %%g7\n"
"	bne,pn	%%icc, 1b\n"
"	 addcc	%%g7, 1, %%g0\n"
"	ble,pn	%%icc, 3f\n"
"	membar	#StoreLoad | #StoreStore\n"
"	ble,pn	%%icc, 3f\n"
"	 nop\n"
"2:\n"
"	.subsection 2\n"
"3:	mov	%0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
"	cmp	%%g1, %%g7\n"
"	bne,pn	%%icc, 1b\n"
"	 cmp	%%g7, 1\n"
"	bl,pn	%%icc, 3f\n"
"	membar	#StoreLoad | #StoreStore\n"
"	bl,pn	%%icc, 3f\n"
"	 nop\n"
"2:\n"
"	.subsection 2\n"
"3:	mov	%0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
"	cmp	%%g1, %%g7\n"
"	bne,pn	%%icc, 1b\n"
"	 cmp	%%g7, 1\n"
"	bl,pn	%%icc, 3f\n"
"	membar	#StoreLoad | #StoreStore\n"
"	bl,pn	%%icc, 3f\n"
"	 nop\n"
"2:\n"
"	.subsection 2\n"
"3:	mov	%2, %%g1\n"
+2 −1
Original line number Diff line number Diff line
@@ -98,8 +98,9 @@ startup_continue:

	sethi		%hi(prom_entry_lock), %g2
1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
	brnz,pn		%g1, 1b
	membar		#StoreLoad | #StoreStore
	brnz,pn		%g1, 1b
	 nop

	sethi		%hi(p1275buf), %g2
	or		%g2, %lo(p1275buf), %g2
+53 −50
Original line number Diff line number Diff line
@@ -87,14 +87,17 @@
#define LOOP_CHUNK3(src, dest, len, branch_dest)		\
	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)

#define DO_SYNC			membar	#Sync;
#define STORE_SYNC(dest, fsrc)				\
	EX_ST(STORE_BLK(%fsrc, %dest));			\
	add			%dest, 0x40, %dest;
	add			%dest, 0x40, %dest;	\
	DO_SYNC

#define STORE_JUMP(dest, fsrc, target)			\
	EX_ST(STORE_BLK(%fsrc, %dest));			\
	add			%dest, 0x40, %dest;	\
	ba,pt			%xcc, target;
	ba,pt			%xcc, target;		\
	 nop;

#define FINISH_VISCHUNK(dest, f0, f1, left)	\
	subcc			%left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f0, %f2, %f48
1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
	STORE_JUMP(o0, f48, 40f) membar #Sync
	STORE_JUMP(o0, f48, 40f)
2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
	STORE_JUMP(o0, f48, 48f) membar #Sync
	STORE_JUMP(o0, f48, 48f)
3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
	STORE_JUMP(o0, f48, 56f) membar #Sync
	STORE_JUMP(o0, f48, 56f)

1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f2, %f4, %f48
1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
	STORE_JUMP(o0, f48, 41f) membar #Sync
	STORE_JUMP(o0, f48, 41f)
2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	STORE_JUMP(o0, f48, 49f) membar #Sync
	STORE_JUMP(o0, f48, 49f)
3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
	STORE_JUMP(o0, f48, 57f) membar #Sync
	STORE_JUMP(o0, f48, 57f)

1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f4, %f6, %f48
1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
	STORE_JUMP(o0, f48, 42f) membar #Sync
	STORE_JUMP(o0, f48, 42f)
2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	STORE_JUMP(o0, f48, 50f) membar #Sync
	STORE_JUMP(o0, f48, 50f)
3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
	STORE_JUMP(o0, f48, 58f) membar #Sync
	STORE_JUMP(o0, f48, 58f)

1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f6, %f8, %f48
1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
	STORE_JUMP(o0, f48, 43f) membar #Sync
	STORE_JUMP(o0, f48, 43f)
2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	STORE_JUMP(o0, f48, 51f) membar #Sync
	STORE_JUMP(o0, f48, 51f)
3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
	STORE_JUMP(o0, f48, 59f) membar #Sync
	STORE_JUMP(o0, f48, 59f)

1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f8, %f10, %f48
1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
	STORE_JUMP(o0, f48, 44f) membar #Sync
	STORE_JUMP(o0, f48, 44f)
2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	STORE_JUMP(o0, f48, 52f) membar #Sync
	STORE_JUMP(o0, f48, 52f)
3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
	STORE_JUMP(o0, f48, 60f) membar #Sync
	STORE_JUMP(o0, f48, 60f)

1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f10, %f12, %f48
1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
	STORE_JUMP(o0, f48, 45f) membar #Sync
	STORE_JUMP(o0, f48, 45f)
2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	STORE_JUMP(o0, f48, 53f) membar #Sync
	STORE_JUMP(o0, f48, 53f)
3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
	STORE_JUMP(o0, f48, 61f) membar #Sync
	STORE_JUMP(o0, f48, 61f)

1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f12, %f14, %f48
1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
	STORE_JUMP(o0, f48, 46f) membar #Sync
	STORE_JUMP(o0, f48, 46f)
2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	STORE_JUMP(o0, f48, 54f) membar #Sync
	STORE_JUMP(o0, f48, 54f)
3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
	STORE_JUMP(o0, f48, 62f) membar #Sync
	STORE_JUMP(o0, f48, 62f)

1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	 faligndata	%f14, %f16, %f48
1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
	STORE_JUMP(o0, f48, 47f) membar #Sync
	STORE_JUMP(o0, f48, 47f)
2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	STORE_JUMP(o0, f48, 55f) membar #Sync
	STORE_JUMP(o0, f48, 55f)
3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
	STORE_JUMP(o0, f48, 63f) membar #Sync
	STORE_JUMP(o0, f48, 63f)

40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)
+13 −2
Original line number Diff line number Diff line
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3

	stda		%f48, [%g3 + %g1] ASI_BLK_P
5:	membar		#Sync
	jmpl		%g7 + %g0, %g0
	ba,pt		%xcc, 80f
	 nop

	.align		32
80:	jmpl		%g7 + %g0, %g0
	 nop

6:	ldub		[%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
	stda		%f32, [%g2 + %g1] ASI_BLK_P
	stda		%f48, [%g3 + %g1] ASI_BLK_P
	membar		#Sync
	jmpl		%g7 + %g0, %g0
	ba,pt		%xcc, 80f
	 nop

	.align		32
80:	jmpl		%g7 + %g0, %g0
	 nop

	.align		32
@@ -126,6 +133,10 @@ VISenterhalf:
	stda		%f0, [%g2 + %g1] ASI_BLK_P
	stda		%f16, [%g3 + %g1] ASI_BLK_P
	membar		#Sync
	ba,pt		%xcc, 4f
	 nop

	.align		32
4:	and		%o5, FPRS_DU, %o5
	jmpl		%g7 + %g0, %g0
	 wr		%o5, FPRS_FEF, %fprs
Loading