Loading libpixelflinger/Android.mk +5 −0 Original line number Original line Diff line number Diff line Loading @@ -43,6 +43,11 @@ ifeq ($(TARGET_ARCH),arm) PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif endif ifeq ($(TARGET_ARCH),mips) PIXELFLINGER_SRC_FILES += arch-mips/t32cb16blend.S PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif LOCAL_SHARED_LIBRARIES := libcutils LOCAL_SHARED_LIBRARIES := libcutils ifneq ($(TARGET_ARCH),arm) ifneq ($(TARGET_ARCH),arm) Loading libpixelflinger/arch-mips/t32cb16blend.S 0 → 100644 +264 −0 Original line number Original line Diff line number Diff line /* libs/pixelflinger/t32cb16blend.S ** ** Copyright 2010, The Android Open Source Project ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** ** http://www.apache.org/licenses/LICENSE-2.0 ** ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. */ #ifdef DEBUG #define DBG #else #define DBG # #endif /* * blend one of 2 16bpp RGB pixels held in dreg selected by shift * with the 32bpp ABGR pixel held in src and store the result in fb * * Assumes that the dreg data is little endian and that * the the second pixel (shift==16) will be merged into * the fb result * * Uses $t0,$t6,$t7,$t8 */ #if __mips==32 && __mips_isa_rev>=2 .macro pixel dreg src fb shift /* * sA = s >> 24 * f = 0x100 - (sA + (sA>>7)) */ DBG .set noat DBG rdhwr $at,$2 DBG .set at srl $t7,\src,24 srl $t6,$t7,7 addu $t7,$t6 li $t6,0x100 subu $t7,$t6,$t7 /* red */ ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11] mul $t6,$t8,$t7 ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5] ext $t8,\src,3,5 # src[7..3] srl $t6,8 addu $t8,$t6 ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11] /* green */ mul $t8,$t0,$t7 ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0] ext $t6,\src,2+8,6 # src[15..10] srl $t8,8 addu $t8,$t6 /* blue */ mul $t0,$t0,$t7 ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5] ext $t6,\src,(3+8+8),5 srl $t8,$t0,8 addu $t8,$t6 ins \fb,$t8,\shift,5 DBG .set noat DBG rdhwr $t8,$2 DBG subu $t8,$at DBG sltu $at,$t8,$v0 DBG movn $v0,$t8,$at DBG sgtu $at,$t8,$v1 DBG movn $v1,$t8,$at DBG .set at .endm #else .macro pixel dreg src fb shift /* * sA = s >> 24 * f = 0x100 - (sA + (sA>>7)) */ DBG .set push DBG .set noat DBG .set mips32r2 DBG rdhwr $at,$2 DBG .set pop srl $t7,\src,24 srl $t6,$t7,7 addu $t7,$t6 li $t6,0x100 subu $t7,$t6,$t7 /* * red * dR = (d >> (6 + 5)) & 0x1f; * dR = (f*dR)>>8 * sR = (s >> ( 3)) & 0x1f; * sR += dR * fb |= sR << 11 */ srl $t8,\dreg,\shift+6+5 .if \shift==0 and $t8,0x1f .endif mul $t8,$t8,$t7 srl $t6,\src,3 and $t6,0x1f srl $t8,8 addu $t8,$t6 .if \shift!=0 sll $t8,\shift+11 or \fb,$t8 .else sll \fb,$t8,11 .endif /* * green * dG = (d >> 5) & 0x3f * dG = (f*dG) >> 8 * sG = (s >> ( 8+2))&0x3F; */ srl $t8,\dreg,\shift+5 and $t8,0x3f mul $t8,$t8,$t7 srl $t6,\src,8+2 and $t6,0x3f srl $t8,8 addu $t8,$t6 sll $t8,\shift + 5 or \fb,$t8 /* blue */ .if \shift!=0 srl $t8,\dreg,\shift and $t8,0x1f .else and $t8,\dreg,0x1f .endif mul $t8,$t8,$t7 srl $t6,\src,(8+8+3) and $t6,0x1f srl $t8,8 addu $t8,$t6 .if \shift!=0 sll $t8,\shift .endif or \fb,$t8 DBG .set push DBG .set noat DBG .set mips32r2 DBG rdhwr $t8,$2 DBG subu $t8,$at DBG sltu $at,$t8,$v0 DBG movn $v0,$t8,$at DBG sgtu $at,$t8,$v1 DBG movn $v1,$t8,$at DBG .set pop .endm #endif .text .align .global scanline_t32cb16blend_mips .ent scanline_t32cb16blend_mips scanline_t32cb16blend_mips: DBG li $v0,0xffffffff DBG li $v1,0 /* Align the destination if necessary */ and $t0,$a0,3 beqz $t0,aligned /* as long as there is at least one pixel */ beqz $a2,done lw $t4,($a1) addu $a0,2 addu $a1,4 beqz $t4,1f lhu $t3,-2($a0) pixel $t3,$t4,$t1,0 sh $t1,-2($a0) 1: subu $a2,1 aligned: /* Check to see if its worth unrolling the loop */ subu $a2,4 bltz $a2,tail /* Process 4 pixels at a time */ fourpixels: /* 1st pair of pixels */ lw $t4,0($a1) lw $t5,4($a1) addu $a0,8 addu $a1,16 /* both are zero, skip this pair */ or $t3,$t4,$t5 beqz $t3,1f /* load the destination */ lw $t3,-8($a0) pixel $t3,$t4,$t1,0 pixel $t3,$t5,$t1,16 sw $t1,-8($a0) 1: /* 2nd pair of pixels */ lw $t4,-8($a1) lw $t5,-4($a1) /* both are zero, skip this pair */ or $t3,$t4,$t5 beqz $t3,1f /* load the destination */ lw $t3,-4($a0) pixel $t3,$t4,$t1,0 pixel $t3,$t5,$t1,16 sw $t1,-4($a0) 1: subu $a2,4 bgtz $a2,fourpixels tail: /* the pixel count underran, restore it now */ addu $a2,4 /* handle the last 0..3 pixels */ beqz $a2,done onepixel: lw $t4,($a1) addu $a0,2 addu $a1,4 beqz $t4,1f lhu $t3,-2($a0) pixel $t3,$t4,$t1,0 sh $t1,-2($a0) 1: subu $a2,1 bnez $a2,onepixel done: DBG .set push DBG .set mips32r2 DBG rdhwr $a0,$3 DBG mul $v0,$a0 DBG mul $v1,$a0 DBG .set pop j $ra .end scanline_t32cb16blend_mips libpixelflinger/scanline.cpp +9 −1 Original line number Original line Diff line number Diff line Loading @@ -110,10 +110,14 @@ static void scanline_clear(context_t* c); static void rect_generic(context_t* c, size_t yc); static void rect_generic(context_t* c, size_t yc); static void rect_memcpy(context_t* c, size_t yc); static void rect_memcpy(context_t* c, size_t yc); #if defined( __arm__) extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); #elif defined(__mips__) extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t); #endif // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- Loading Loading @@ -2136,7 +2140,7 @@ last_one: void scanline_t32cb16blend(context_t* c) void scanline_t32cb16blend(context_t* c) { { #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips))) int32_t x = c->iterators.xl; int32_t x = c->iterators.xl; size_t ct = c->iterators.xr - x; size_t ct = c->iterators.xr - x; int32_t y = c->iterators.y; int32_t y = c->iterators.y; Loading @@ -2148,7 +2152,11 @@ void scanline_t32cb16blend(context_t* c) const int32_t v = (c->state.texture[0].shade.it0>>16) + y; const int32_t v = (c->state.texture[0].shade.it0>>16) + y; uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); #ifdef __arm__ scanline_t32cb16blend_arm(dst, src, ct); scanline_t32cb16blend_arm(dst, src, ct); #else scanline_t32cb16blend_mips(dst, src, ct); #endif #else #else dst_iterator16 di(c); dst_iterator16 di(c); horz_iterator32 hi(c); horz_iterator32 hi(c); Loading Loading
libpixelflinger/Android.mk +5 −0 Original line number Original line Diff line number Diff line Loading @@ -43,6 +43,11 @@ ifeq ($(TARGET_ARCH),arm) PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif endif ifeq ($(TARGET_ARCH),mips) PIXELFLINGER_SRC_FILES += arch-mips/t32cb16blend.S PIXELFLINGER_CFLAGS += -fstrict-aliasing -fomit-frame-pointer endif LOCAL_SHARED_LIBRARIES := libcutils LOCAL_SHARED_LIBRARIES := libcutils ifneq ($(TARGET_ARCH),arm) ifneq ($(TARGET_ARCH),arm) Loading
libpixelflinger/arch-mips/t32cb16blend.S 0 → 100644 +264 −0 Original line number Original line Diff line number Diff line /* libs/pixelflinger/t32cb16blend.S ** ** Copyright 2010, The Android Open Source Project ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** ** http://www.apache.org/licenses/LICENSE-2.0 ** ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. */ #ifdef DEBUG #define DBG #else #define DBG # #endif /* * blend one of 2 16bpp RGB pixels held in dreg selected by shift * with the 32bpp ABGR pixel held in src and store the result in fb * * Assumes that the dreg data is little endian and that * the the second pixel (shift==16) will be merged into * the fb result * * Uses $t0,$t6,$t7,$t8 */ #if __mips==32 && __mips_isa_rev>=2 .macro pixel dreg src fb shift /* * sA = s >> 24 * f = 0x100 - (sA + (sA>>7)) */ DBG .set noat DBG rdhwr $at,$2 DBG .set at srl $t7,\src,24 srl $t6,$t7,7 addu $t7,$t6 li $t6,0x100 subu $t7,$t6,$t7 /* red */ ext $t8,\dreg,\shift+6+5,5 # dst[\shift:15..11] mul $t6,$t8,$t7 ext $t0,\dreg,\shift+5,6 # start green extraction dst[\shift:10..5] ext $t8,\src,3,5 # src[7..3] srl $t6,8 addu $t8,$t6 ins \fb,$t8,\shift+6+5,5 # dst[\shift:15..11] /* green */ mul $t8,$t0,$t7 ext $t0,\dreg,\shift,5 # start blue extraction dst[\shift:4..0] ext $t6,\src,2+8,6 # src[15..10] srl $t8,8 addu $t8,$t6 /* blue */ mul $t0,$t0,$t7 ins \fb,$t8,\shift+5,6 # finish green insertion dst[\shift:10..5] ext $t6,\src,(3+8+8),5 srl $t8,$t0,8 addu $t8,$t6 ins \fb,$t8,\shift,5 DBG .set noat DBG rdhwr $t8,$2 DBG subu $t8,$at DBG sltu $at,$t8,$v0 DBG movn $v0,$t8,$at DBG sgtu $at,$t8,$v1 DBG movn $v1,$t8,$at DBG .set at .endm #else .macro pixel dreg src fb shift /* * sA = s >> 24 * f = 0x100 - (sA + (sA>>7)) */ DBG .set push DBG .set noat DBG .set mips32r2 DBG rdhwr $at,$2 DBG .set pop srl $t7,\src,24 srl $t6,$t7,7 addu $t7,$t6 li $t6,0x100 subu $t7,$t6,$t7 /* * red * dR = (d >> (6 + 5)) & 0x1f; * dR = (f*dR)>>8 * sR = (s >> ( 3)) & 0x1f; * sR += dR * fb |= sR << 11 */ srl $t8,\dreg,\shift+6+5 .if \shift==0 and $t8,0x1f .endif mul $t8,$t8,$t7 srl $t6,\src,3 and $t6,0x1f srl $t8,8 addu $t8,$t6 .if \shift!=0 sll $t8,\shift+11 or \fb,$t8 .else sll \fb,$t8,11 .endif /* * green * dG = (d >> 5) & 0x3f * dG = (f*dG) >> 8 * sG = (s >> ( 8+2))&0x3F; */ srl $t8,\dreg,\shift+5 and $t8,0x3f mul $t8,$t8,$t7 srl $t6,\src,8+2 and $t6,0x3f srl $t8,8 addu $t8,$t6 sll $t8,\shift + 5 or \fb,$t8 /* blue */ .if \shift!=0 srl $t8,\dreg,\shift and $t8,0x1f .else and $t8,\dreg,0x1f .endif mul $t8,$t8,$t7 srl $t6,\src,(8+8+3) and $t6,0x1f srl $t8,8 addu $t8,$t6 .if \shift!=0 sll $t8,\shift .endif or \fb,$t8 DBG .set push DBG .set noat DBG .set mips32r2 DBG rdhwr $t8,$2 DBG subu $t8,$at DBG sltu $at,$t8,$v0 DBG movn $v0,$t8,$at DBG sgtu $at,$t8,$v1 DBG movn $v1,$t8,$at DBG .set pop .endm #endif .text .align .global scanline_t32cb16blend_mips .ent scanline_t32cb16blend_mips scanline_t32cb16blend_mips: DBG li $v0,0xffffffff DBG li $v1,0 /* Align the destination if necessary */ and $t0,$a0,3 beqz $t0,aligned /* as long as there is at least one pixel */ beqz $a2,done lw $t4,($a1) addu $a0,2 addu $a1,4 beqz $t4,1f lhu $t3,-2($a0) pixel $t3,$t4,$t1,0 sh $t1,-2($a0) 1: subu $a2,1 aligned: /* Check to see if its worth unrolling the loop */ subu $a2,4 bltz $a2,tail /* Process 4 pixels at a time */ fourpixels: /* 1st pair of pixels */ lw $t4,0($a1) lw $t5,4($a1) addu $a0,8 addu $a1,16 /* both are zero, skip this pair */ or $t3,$t4,$t5 beqz $t3,1f /* load the destination */ lw $t3,-8($a0) pixel $t3,$t4,$t1,0 pixel $t3,$t5,$t1,16 sw $t1,-8($a0) 1: /* 2nd pair of pixels */ lw $t4,-8($a1) lw $t5,-4($a1) /* both are zero, skip this pair */ or $t3,$t4,$t5 beqz $t3,1f /* load the destination */ lw $t3,-4($a0) pixel $t3,$t4,$t1,0 pixel $t3,$t5,$t1,16 sw $t1,-4($a0) 1: subu $a2,4 bgtz $a2,fourpixels tail: /* the pixel count underran, restore it now */ addu $a2,4 /* handle the last 0..3 pixels */ beqz $a2,done onepixel: lw $t4,($a1) addu $a0,2 addu $a1,4 beqz $t4,1f lhu $t3,-2($a0) pixel $t3,$t4,$t1,0 sh $t1,-2($a0) 1: subu $a2,1 bnez $a2,onepixel done: DBG .set push DBG .set mips32r2 DBG rdhwr $a0,$3 DBG mul $v0,$a0 DBG mul $v1,$a0 DBG .set pop j $ra .end scanline_t32cb16blend_mips
libpixelflinger/scanline.cpp +9 −1 Original line number Original line Diff line number Diff line Loading @@ -110,10 +110,14 @@ static void scanline_clear(context_t* c); static void rect_generic(context_t* c, size_t yc); static void rect_generic(context_t* c, size_t yc); static void rect_memcpy(context_t* c, size_t yc); static void rect_memcpy(context_t* c, size_t yc); #if defined( __arm__) extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); #elif defined(__mips__) extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t); #endif // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- Loading Loading @@ -2136,7 +2140,7 @@ last_one: void scanline_t32cb16blend(context_t* c) void scanline_t32cb16blend(context_t* c) { { #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips))) int32_t x = c->iterators.xl; int32_t x = c->iterators.xl; size_t ct = c->iterators.xr - x; size_t ct = c->iterators.xr - x; int32_t y = c->iterators.y; int32_t y = c->iterators.y; Loading @@ -2148,7 +2152,11 @@ void scanline_t32cb16blend(context_t* c) const int32_t v = (c->state.texture[0].shade.it0>>16) + y; const int32_t v = (c->state.texture[0].shade.it0>>16) + y; uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); #ifdef __arm__ scanline_t32cb16blend_arm(dst, src, ct); scanline_t32cb16blend_arm(dst, src, ct); #else scanline_t32cb16blend_mips(dst, src, ct); #endif #else #else dst_iterator16 di(c); dst_iterator16 di(c); horz_iterator32 hi(c); horz_iterator32 hi(c); Loading