xtensa: clean up word alignment macros in assembly code (fbb871e2) · Commits · e / devices / android_kernel_fairphone_FP5

arch/xtensa/include/asm/asmmacro.h

+33 −0

Original line number	Diff line number	Diff line
		@@ -158,4 +158,37 @@
		.previous \
		97:


		/*
		* Extract unaligned word that is split between two registers w0 and w1
		* into r regardless of machine endianness. SAR must be loaded with the
		* starting bit of the word (see __ssa8).
		*/

		.macro __src_b r, w0, w1
		#ifdef __XTENSA_EB__
		src \r, \w0, \w1
		#else
		src \r, \w1, \w0
		#endif
		.endm

		/*
		* Load 2 lowest address bits of r into SAR for __src_b to extract unaligned
		* word starting at r from two registers loaded from consecutive aligned
		* addresses covering r regardless of machine endianness.
		*
		* r 0 1 2 3
		* LE SAR 0 8 16 24
		* BE SAR 32 24 16 8
		*/

		.macro __ssa8 r
		#ifdef __XTENSA_EB__
		ssa8b \r
		#else
		ssa8l \r
		#endif
		.endm

		#endif /* _XTENSA_ASMMACRO_H */

arch/xtensa/kernel/align.S

+1 −4

Original line number	Diff line number	Diff line
		@@ -19,6 +19,7 @@
		#include <linux/linkage.h>
		#include <asm/current.h>
		#include <asm/asm-offsets.h>
		#include <asm/asmmacro.h>
		#include <asm/processor.h>

		#if XCHAL_UNALIGNED_LOAD_EXCEPTION \|\| XCHAL_UNALIGNED_STORE_EXCEPTION
		@@ -66,8 +67,6 @@
		#define INSN_T 24
		#define INSN_OP1 16

		.macro __src_b r, w0, w1; src \r, \w0, \w1; .endm
		.macro __ssa8 r; ssa8b \r; .endm
		.macro __ssa8r r; ssa8l \r; .endm
		.macro __sh r, s; srl \r, \s; .endm
		.macro __sl r, s; sll \r, \s; .endm
		@@ -81,8 +80,6 @@
		#define INSN_T 4
		#define INSN_OP1 12

		.macro __src_b r, w0, w1; src \r, \w1, \w0; .endm
		.macro __ssa8 r; ssa8l \r; .endm
		.macro __ssa8r r; ssa8b \r; .endm
		.macro __sh r, s; sll \r, \s; .endm
		.macro __sl r, s; srl \r, \s; .endm

arch/xtensa/lib/memcopy.S

+17 −32

Original line number	Diff line number	Diff line
		@@ -10,22 +10,7 @@
		*/

		#include <variant/core.h>

		.macro src_b r, w0, w1
		#ifdef __XTENSA_EB__
		src \r, \w0, \w1
		#else
		src \r, \w1, \w0
		#endif
		.endm

		.macro ssa8 r
		#ifdef __XTENSA_EB__
		ssa8b \r
		#else
		ssa8l \r
		#endif
		.endm
		#include <asm/asmmacro.h>

		/*
		* void memcpy(void dst, const void *src, size_t len);
		@@ -209,7 +194,7 @@ memcpy:
		.Lsrcunaligned:
		_beqz a4, .Ldone # avoid loading anything for zero-length copies
		# copy 16 bytes per iteration for word-aligned dst and unaligned src
		ssa8 a3 # set shift amount from byte offset
		__ssa8 a3 # set shift amount from byte offset

		/* set to 1 when running on ISS (simulator) with the
		lint or ferret client, or 0 to save a few cycles */
		@@ -229,16 +214,16 @@ memcpy:
		.Loop2:
		l32i a7, a3, 4
		l32i a8, a3, 8
		src_b a6, a6, a7
		__src_b a6, a6, a7
		s32i a6, a5, 0
		l32i a9, a3, 12
		src_b a7, a7, a8
		__src_b a7, a7, a8
		s32i a7, a5, 4
		l32i a6, a3, 16
		src_b a8, a8, a9
		__src_b a8, a8, a9
		s32i a8, a5, 8
		addi a3, a3, 16
		src_b a9, a9, a6
		__src_b a9, a9, a6
		s32i a9, a5, 12
		addi a5, a5, 16
		#if !XCHAL_HAVE_LOOPS
		@@ -249,10 +234,10 @@ memcpy:
		# copy 8 bytes
		l32i a7, a3, 4
		l32i a8, a3, 8
		src_b a6, a6, a7
		__src_b a6, a6, a7
		s32i a6, a5, 0
		addi a3, a3, 8
		src_b a7, a7, a8
		__src_b a7, a7, a8
		s32i a7, a5, 4
		addi a5, a5, 8
		mov a6, a8
		@@ -261,7 +246,7 @@ memcpy:
		# copy 4 bytes
		l32i a7, a3, 4
		addi a3, a3, 4
		src_b a6, a6, a7
		__src_b a6, a6, a7
		s32i a6, a5, 0
		addi a5, a5, 4
		mov a6, a7
		@@ -485,7 +470,7 @@ memmove:
		.Lbacksrcunaligned:
		_beqz a4, .Lbackdone # avoid loading anything for zero-length copies
		# copy 16 bytes per iteration for word-aligned dst and unaligned src
		ssa8 a3 # set shift amount from byte offset
		__ssa8 a3 # set shift amount from byte offset
		#define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
		* the lint or ferret client, or 0
		* to save a few cycles */
		@@ -506,15 +491,15 @@ memmove:
		l32i a7, a3, 12
		l32i a8, a3, 8
		addi a5, a5, -16
		src_b a6, a7, a6
		__src_b a6, a7, a6
		s32i a6, a5, 12
		l32i a9, a3, 4
		src_b a7, a8, a7
		__src_b a7, a8, a7
		s32i a7, a5, 8
		l32i a6, a3, 0
		src_b a8, a9, a8
		__src_b a8, a9, a8
		s32i a8, a5, 4
		src_b a9, a6, a9
		__src_b a9, a6, a9
		s32i a9, a5, 0
		#if !XCHAL_HAVE_LOOPS
		bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
		@@ -526,9 +511,9 @@ memmove:
		l32i a7, a3, 4
		l32i a8, a3, 0
		addi a5, a5, -8
		src_b a6, a7, a6
		__src_b a6, a7, a6
		s32i a6, a5, 4
		src_b a7, a8, a7
		__src_b a7, a8, a7
		s32i a7, a5, 0
		mov a6, a8
		.Lback12:
		@@ -537,7 +522,7 @@ memmove:
		addi a3, a3, -4
		l32i a7, a3, 0
		addi a5, a5, -4
		src_b a6, a7, a6
		__src_b a6, a7, a6
		s32i a6, a5, 0
		mov a6, a7
		.Lback13:

arch/xtensa/lib/usercopy.S

+8 −16

Original line number	Diff line number	Diff line
		@@ -56,14 +56,6 @@
		#include <variant/core.h>
		#include <asm/asmmacro.h>

		#ifdef __XTENSA_EB__
		#define ALIGN(R, W0, W1) src R, W0, W1
		#define SSA8(R) ssa8b R
		#else
		#define ALIGN(R, W0, W1) src R, W1, W0
		#define SSA8(R) ssa8l R
		#endif

		.text
		.align 4
		.global __xtensa_copy_user
		@@ -81,7 +73,7 @@ __xtensa_copy_user:
		# per iteration
		movi a8, 3 # if source is also aligned,
		bnone a3, a8, .Laligned # then use word copy
		SSA8( a3) # set shift amount from byte offset
		__ssa8 a3 # set shift amount from byte offset
		bnez a4, .Lsrcunaligned
		movi a2, 0 # return success for len==0
		retw
		@@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word
		.Loop2:
		EX(10f) l32i a7, a3, 4
		EX(10f) l32i a8, a3, 8
		ALIGN( a6, a6, a7)
		__src_b a6, a6, a7
		EX(10f) s32i a6, a5, 0
		EX(10f) l32i a9, a3, 12
		ALIGN( a7, a7, a8)
		__src_b a7, a7, a8
		EX(10f) s32i a7, a5, 4
		EX(10f) l32i a6, a3, 16
		ALIGN( a8, a8, a9)
		__src_b a8, a8, a9
		EX(10f) s32i a8, a5, 8
		addi a3, a3, 16
		ALIGN( a9, a9, a6)
		__src_b a9, a9, a6
		EX(10f) s32i a9, a5, 12
		addi a5, a5, 16
		#if !XCHAL_HAVE_LOOPS
		@@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12
		# copy 8 bytes
		EX(10f) l32i a7, a3, 4
		EX(10f) l32i a8, a3, 8
		ALIGN( a6, a6, a7)
		__src_b a6, a6, a7
		EX(10f) s32i a6, a5, 0
		addi a3, a3, 8
		ALIGN( a7, a7, a8)
		__src_b a7, a7, a8
		EX(10f) s32i a7, a5, 4
		addi a5, a5, 8
		mov a6, a8
		@@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4
		# copy 4 bytes
		EX(10f) l32i a7, a3, 4
		addi a3, a3, 4
		ALIGN( a6, a6, a7)
		__src_b a6, a6, a7
		EX(10f) s32i a6, a5, 0
		addi a5, a5, 4
		mov a6, a7