am 7b682c47: Merge "Add Mips support to libpixelflinger" (697b6379) · Commits · e / os / android_system_core

libpixelflinger/Android.mk

+5 −0

Original line number	Original line	Diff line number	Diff line
	@@ -43,6 +43,11 @@ ifeq ($(TARGET_ARCH),arm)
	PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer		PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
	endif		endif

			ifeq ($(TARGET_ARCH),mips)
			PIXELFLINGER_SRC_FILES += arch-mips/t32cb16blend.S
			PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer
			endif

	LOCAL_SHARED_LIBRARIES := libcutils		LOCAL_SHARED_LIBRARIES := libcutils

	ifneq ($(TARGET_ARCH),arm)		ifneq ($(TARGET_ARCH),arm)

libpixelflinger/arch-mips/t32cb16blend.S

0 → 100644

+264 −0

Original line number	Original line	Diff line number	Diff line
			/* libs/pixelflinger/t32cb16blend.S
			**
			** Copyright 2010, The Android Open Source Project
			**
			** Licensed under the Apache License, Version 2.0 (the "License");
			** you may not use this file except in compliance with the License.
			** You may obtain a copy of the License at
			**
			** http://www.apache.org/licenses/LICENSE-2.0
			**
			** Unless required by applicable law or agreed to in writing, software
			** distributed under the License is distributed on an "AS IS" BASIS,
			** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			** See the License for the specific language governing permissions and
			** limitations under the License.
			*/

			#ifdef DEBUG
			#define DBG
			#else
			#define DBG #
			#endif

			/*
			* blend one of 2 16bpp RGB pixels held in dreg selected by shift
			* with the 32bpp ABGR pixel held in src and store the result in fb
			*
			* Assumes that the dreg data is little endian and that
			* the the second pixel (shift==16) will be merged into
			* the fb result
			*
			* Uses $t0,$t6,$t7,$t8
			*/

			#if __mips==32 && __mips_isa_rev>=2
			.macro pixel dreg src fb shift
			/*
			* sA = s >> 24
			* f = 0x100 - (sA + (sA>>7))
			*/
			DBG .set noat
			DBG rdhwr $at,$2
			DBG .set at

			srl $t7,\src,24
			srl $t6,$t7,7
			addu $t7,$t6
			li $t6,0x100
			subu $t7,$t6,$t7

			/* red */
			ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11]
			mul $t6,$t8,$t7
			ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5]
			ext $t8,\src,3,5 # src[7..3]
			srl $t6,8
			addu $t8,$t6
			ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11]

			/* green */
			mul $t8,$t0,$t7
			ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0]
			ext $t6,\src,2+8,6 # src[15..10]
			srl $t8,8
			addu $t8,$t6

			/* blue */
			mul $t0,$t0,$t7
			ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5]
			ext $t6,\src,(3+8+8),5
			srl $t8,$t0,8
			addu $t8,$t6
			ins \fb,$t8,\shift,5

			DBG .set noat
			DBG rdhwr $t8,$2
			DBG subu $t8,$at
			DBG sltu $at,$t8,$v0
			DBG movn $v0,$t8,$at
			DBG sgtu $at,$t8,$v1
			DBG movn $v1,$t8,$at
			DBG .set at
			.endm

			#else

			.macro pixel dreg src fb shift
			/*
			* sA = s >> 24
			* f = 0x100 - (sA + (sA>>7))
			*/
			DBG .set push
			DBG .set noat
			DBG .set mips32r2
			DBG rdhwr $at,$2
			DBG .set pop

			srl $t7,\src,24
			srl $t6,$t7,7
			addu $t7,$t6
			li $t6,0x100
			subu $t7,$t6,$t7

			/*
			* red
			* dR = (d >> (6 + 5)) & 0x1f;
			* dR = (f*dR)>>8
			* sR = (s >> ( 3)) & 0x1f;
			* sR += dR
			* fb \|= sR << 11
			*/
			srl $t8,\dreg,\shift+6+5
			.if \shift==0
			and $t8,0x1f
			.endif
			mul $t8,$t8,$t7
			srl $t6,\src,3
			and $t6,0x1f
			srl $t8,8
			addu $t8,$t6
			.if \shift!=0
			sll $t8,\shift+11
			or \fb,$t8
			.else
			sll \fb,$t8,11
			.endif

			/*
			* green
			* dG = (d >> 5) & 0x3f
			* dG = (f*dG) >> 8
			* sG = (s >> ( 8+2))&0x3F;
			*/
			srl $t8,\dreg,\shift+5
			and $t8,0x3f
			mul $t8,$t8,$t7
			srl $t6,\src,8+2
			and $t6,0x3f
			srl $t8,8
			addu $t8,$t6
			sll $t8,\shift + 5
			or \fb,$t8

			/* blue */
			.if \shift!=0
			srl $t8,\dreg,\shift
			and $t8,0x1f
			.else
			and $t8,\dreg,0x1f
			.endif
			mul $t8,$t8,$t7
			srl $t6,\src,(8+8+3)
			and $t6,0x1f
			srl $t8,8
			addu $t8,$t6
			.if \shift!=0
			sll $t8,\shift
			.endif
			or \fb,$t8
			DBG .set push
			DBG .set noat
			DBG .set mips32r2
			DBG rdhwr $t8,$2
			DBG subu $t8,$at
			DBG sltu $at,$t8,$v0
			DBG movn $v0,$t8,$at
			DBG sgtu $at,$t8,$v1
			DBG movn $v1,$t8,$at
			DBG .set pop
			.endm
			#endif

			.text
			.align

			.global scanline_t32cb16blend_mips
			.ent scanline_t32cb16blend_mips
			scanline_t32cb16blend_mips:
			DBG li $v0,0xffffffff
			DBG li $v1,0
			/* Align the destination if necessary */
			and $t0,$a0,3
			beqz $t0,aligned

			/* as long as there is at least one pixel */
			beqz $a2,done

			lw $t4,($a1)
			addu $a0,2
			addu $a1,4
			beqz $t4,1f
			lhu $t3,-2($a0)
			pixel $t3,$t4,$t1,0
			sh $t1,-2($a0)
			1: subu $a2,1

			aligned:
			/* Check to see if its worth unrolling the loop */
			subu $a2,4
			bltz $a2,tail

			/* Process 4 pixels at a time */
			fourpixels:
			/* 1st pair of pixels */
			lw $t4,0($a1)
			lw $t5,4($a1)
			addu $a0,8
			addu $a1,16

			/* both are zero, skip this pair */
			or $t3,$t4,$t5
			beqz $t3,1f

			/* load the destination */
			lw $t3,-8($a0)

			pixel $t3,$t4,$t1,0
			pixel $t3,$t5,$t1,16
			sw $t1,-8($a0)

			1:
			/* 2nd pair of pixels */
			lw $t4,-8($a1)
			lw $t5,-4($a1)

			/* both are zero, skip this pair */
			or $t3,$t4,$t5
			beqz $t3,1f

			/* load the destination */
			lw $t3,-4($a0)

			pixel $t3,$t4,$t1,0
			pixel $t3,$t5,$t1,16
			sw $t1,-4($a0)

			1: subu $a2,4
			bgtz $a2,fourpixels

			tail:
			/* the pixel count underran, restore it now */
			addu $a2,4

			/* handle the last 0..3 pixels */
			beqz $a2,done
			onepixel:
			lw $t4,($a1)
			addu $a0,2
			addu $a1,4
			beqz $t4,1f
			lhu $t3,-2($a0)
			pixel $t3,$t4,$t1,0
			sh $t1,-2($a0)
			1: subu $a2,1
			bnez $a2,onepixel
			done:
			DBG .set push
			DBG .set mips32r2
			DBG rdhwr $a0,$3
			DBG mul $v0,$a0
			DBG mul $v1,$a0
			DBG .set pop
			j $ra
			.end scanline_t32cb16blend_mips

libpixelflinger/scanline.cpp

+9 −1

Original line number	Original line	Diff line number	Diff line
	@@ -110,10 +110,14 @@ static void scanline_clear(context_t* c);
	static void rect_generic(context_t* c, size_t yc);		static void rect_generic(context_t* c, size_t yc);
	static void rect_memcpy(context_t* c, size_t yc);		static void rect_memcpy(context_t* c, size_t yc);

			#if defined( __arm__)
	extern "C" void scanline_t32cb16blend_arm(uint16_t, uint32_t, size_t);		extern "C" void scanline_t32cb16blend_arm(uint16_t, uint32_t, size_t);
	extern "C" void scanline_t32cb16_arm(uint16_t dst, uint32_t src, size_t ct);		extern "C" void scanline_t32cb16_arm(uint16_t dst, uint32_t src, size_t ct);
	extern "C" void scanline_col32cb16blend_neon(uint16_t dst, uint32_t col, size_t ct);		extern "C" void scanline_col32cb16blend_neon(uint16_t dst, uint32_t col, size_t ct);
	extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);		extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
			#elif defined(__mips__)
			extern "C" void scanline_t32cb16blend_mips(uint16_t, uint32_t, size_t);
			#endif

	// ----------------------------------------------------------------------------		// ----------------------------------------------------------------------------

	@@ -2136,7 +2140,7 @@ last_one:

	void scanline_t32cb16blend(context_t* c)		void scanline_t32cb16blend(context_t* c)
	{		{
	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))		#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) \|\| defined(__mips)))
	int32_t x = c->iterators.xl;		int32_t x = c->iterators.xl;
	size_t ct = c->iterators.xr - x;		size_t ct = c->iterators.xr - x;
	int32_t y = c->iterators.y;		int32_t y = c->iterators.y;
	@@ -2148,7 +2152,11 @@ void scanline_t32cb16blend(context_t* c)
	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;		const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));		uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));

			#ifdef __arm__
	scanline_t32cb16blend_arm(dst, src, ct);		scanline_t32cb16blend_arm(dst, src, ct);
			#else
			scanline_t32cb16blend_mips(dst, src, ct);
			#endif
	#else		#else
	dst_iterator16 di(c);		dst_iterator16 di(c);
	horz_iterator32 hi(c);		horz_iterator32 hi(c);