powerpc: word-at-a-time optimization for 64-bit Little Endian (d0cebfa6) · Commits · e / devices / android_kernel_fairphone_FP4

arch/powerpc/include/asm/word-at-a-time.h

+32 −25

Original line number	Diff line number	Diff line
		@@ -42,13 +42,6 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct

		#else

		/*
		* This is largely generic for little-endian machines, but the
		* optimal byte mask counting is probably going to be something
		* that is architecture-specific. If you have a reliably fast
		* bit count instruction, that might be better than the multiply
		* and shift, for example.
		*/
		struct word_at_a_time {
		const unsigned long one_bits, high_bits;
		};
		@@ -57,19 +50,32 @@ struct word_at_a_time {

		#ifdef CONFIG_64BIT

		/*
		* Jan Achrenius on G+: microoptimized version of
		* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
		* that works for the bytemasks without having to
		* mask them first.
		*/
		static inline long count_masked_bytes(unsigned long mask)
		/* Alan Modra's little-endian strlen tail for 64-bit */
		#define create_zero_mask(mask) (mask)

		static inline unsigned long find_zero(unsigned long mask)
		{
		return mask*0x0001020304050608ul >> 56;
		unsigned long leading_zero_bits;
		long trailing_zero_bit_mask;

		asm ("addi %1,%2,-1\n\t"
		"andc %1,%1,%2\n\t"
		"popcntd %0,%1"
		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
		: "r" (mask));
		return leading_zero_bits >> 3;
		}

		#else /* 32-bit case */

		/*
		* This is largely generic for little-endian machines, but the
		* optimal byte mask counting is probably going to be something
		* that is architecture-specific. If you have a reliably fast
		* bit count instruction, that might be better than the multiply
		* and shift, for example.
		*/

		/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
		static inline long count_masked_bytes(long mask)
		{
		@@ -79,6 +85,17 @@ static inline long count_masked_bytes(long mask)
		return a & mask;
		}

		static inline unsigned long create_zero_mask(unsigned long bits)
		{
		bits = (bits - 1) & ~bits;
		return bits >> 7;
		}

		static inline unsigned long find_zero(unsigned long mask)
		{
		return count_masked_bytes(mask);
		}

		#endif

		/* Return nonzero if it has a zero */
		@@ -94,19 +111,9 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
		return bits;
		}

		static inline unsigned long create_zero_mask(unsigned long bits)
		{
		bits = (bits - 1) & ~bits;
		return bits >> 7;
		}

		/* The mask we created is directly usable as a bytemask */
		#define zero_bytemask(mask) (mask)

		static inline unsigned long find_zero(unsigned long mask)
		{
		return count_masked_bytes(mask);
		}
		#endif

		#endif /* _ASM_WORD_AT_A_TIME_H */