Loading libpixelflinger/codeflinger/ARMAssemblerInterface.cpp +23 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,29 @@ uint32_t ARMAssemblerInterface::__immed8_pre(int32_t immed8, int W) ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); } // The following four functions are required for address manipulation // These are virtual functions, which can be overridden by architectures // that need special handling of address values (e.g. 64-bit arch) void ARMAssemblerInterface::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) { LDR(cc, Rd, Rn, offset); } void ARMAssemblerInterface::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) { STR(cc, Rd, Rn, offset); } void ARMAssemblerInterface::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2) { dataProcessing(opADD, cc, s, Rd, Rn, Op2); } void ARMAssemblerInterface::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2) { dataProcessing(opSUB, cc, s, Rd, Rn, Op2); } }; // namespace android libpixelflinger/codeflinger/ARMAssemblerInterface.h +10 −0 Original line number Diff line number Diff line Loading @@ -331,6 +331,16 @@ public: inline void SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) { SMLAW(cc, yT, Rd, Rm, Rs, Rn); } // Address loading/storing/manipulation virtual void ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_STR (int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2); virtual void ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2); }; }; // namespace android Loading libpixelflinger/codeflinger/ARMAssemblerProxy.cpp +13 −0 Original line number Diff line number Diff line Loading @@ -294,5 +294,18 @@ void ARMAssemblerProxy::UBFX(int cc, int Rd, int Rn, int lsb, int width) { mTarget->UBFX(cc, Rd, Rn, lsb, width); } void ARMAssemblerProxy::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) { mTarget->ADDR_LDR(cc, Rd, Rn, offset); } void ARMAssemblerProxy::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) { mTarget->ADDR_STR(cc, Rd, Rn, offset); } void ARMAssemblerProxy::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2){ mTarget->ADDR_ADD(cc, s, Rd, Rn, Op2); } void ARMAssemblerProxy::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2){ mTarget->ADDR_SUB(cc, s, Rd, Rn, Op2); } }; // namespace android libpixelflinger/codeflinger/ARMAssemblerProxy.h +9 −0 Original line number Diff line number Diff line Loading @@ -146,6 +146,15 @@ public: virtual void UXTB16(int cc, int Rd, int Rm, int rotate); virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width); virtual void ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_STR (int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2); virtual void ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2); private: ARMAssemblerInterface* mTarget; }; Loading libpixelflinger/codeflinger/GGLAssembler.cpp +17 −17 Original line number Diff line number Diff line Loading @@ -263,7 +263,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) const int mask = GGL_DITHER_SIZE-1; parts.dither = reg_t(regs.obtain()); AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg); ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg); LDRB(AL, parts.dither.reg, parts.dither.reg, immed12_pre(GGL_OFFSETOF(ditherMatrix))); } Loading Loading @@ -336,7 +336,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) build_iterate_z(parts); build_iterate_f(parts); if (!mAllMasked) { ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); } SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); B(PL, "fragment_loop"); Loading Loading @@ -392,7 +392,7 @@ void GGLAssembler::build_scanline_prolog( int Rs = scratches.obtain(); parts.cbPtr.setTo(obtainReg(), cb_bits); CONTEXT_LOAD(Rs, state.buffers.color.stride); CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data); SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs base_offset(parts.cbPtr, parts.cbPtr, Rs); scratches.recycle(Rs); Loading Loading @@ -428,11 +428,11 @@ void GGLAssembler::build_scanline_prolog( int Rs = dzdx; int zbase = scratches.obtain(); CONTEXT_LOAD(Rs, state.buffers.depth.stride); CONTEXT_LOAD(zbase, state.buffers.depth.data); CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data); SMLABB(AL, Rs, Ry, Rs, Rx); ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); CONTEXT_STORE(zbase, generated_vars.zbase); ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); CONTEXT_ADDR_STORE(zbase, generated_vars.zbase); } // init texture coordinates Loading @@ -445,8 +445,8 @@ void GGLAssembler::build_scanline_prolog( // init coverage factor application (anti-aliasing) if (mAA) { parts.covPtr.setTo(obtainReg(), 16); CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage); ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); } } Loading Loading @@ -765,8 +765,8 @@ void GGLAssembler::build_depth_test( int depth = scratches.obtain(); int z = parts.z.reg; CONTEXT_LOAD(zbase, generated_vars.zbase); // stall SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); // above does zbase = zbase + ((count >> 16) << 1) if (mask & Z_TEST) { Loading Loading @@ -990,22 +990,22 @@ void GGLAssembler::base_offset( { switch (b.size) { case 32: ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); break; case 24: if (d.reg == b.reg) { ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); ADD(AL, 0, d.reg, d.reg, o.reg); ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); ADDR_ADD(AL, 0, d.reg, d.reg, o.reg); } else { ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); ADD(AL, 0, d.reg, d.reg, b.reg); ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); ADDR_ADD(AL, 0, d.reg, d.reg, b.reg); } break; case 16: ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); break; case 8: ADD(AL, 0, d.reg, b.reg, o.reg); ADDR_ADD(AL, 0, d.reg, b.reg, o.reg); break; } } Loading Loading
libpixelflinger/codeflinger/ARMAssemblerInterface.cpp +23 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,29 @@ uint32_t ARMAssemblerInterface::__immed8_pre(int32_t immed8, int W) ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF)); } // The following four functions are required for address manipulation // These are virtual functions, which can be overridden by architectures // that need special handling of address values (e.g. 64-bit arch) void ARMAssemblerInterface::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) { LDR(cc, Rd, Rn, offset); } void ARMAssemblerInterface::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) { STR(cc, Rd, Rn, offset); } void ARMAssemblerInterface::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2) { dataProcessing(opADD, cc, s, Rd, Rn, Op2); } void ARMAssemblerInterface::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2) { dataProcessing(opSUB, cc, s, Rd, Rn, Op2); } }; // namespace android
libpixelflinger/codeflinger/ARMAssemblerInterface.h +10 −0 Original line number Diff line number Diff line Loading @@ -331,6 +331,16 @@ public: inline void SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) { SMLAW(cc, yT, Rd, Rm, Rs, Rn); } // Address loading/storing/manipulation virtual void ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_STR (int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2); virtual void ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2); }; }; // namespace android Loading
libpixelflinger/codeflinger/ARMAssemblerProxy.cpp +13 −0 Original line number Diff line number Diff line Loading @@ -294,5 +294,18 @@ void ARMAssemblerProxy::UBFX(int cc, int Rd, int Rn, int lsb, int width) { mTarget->UBFX(cc, Rd, Rn, lsb, width); } void ARMAssemblerProxy::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) { mTarget->ADDR_LDR(cc, Rd, Rn, offset); } void ARMAssemblerProxy::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) { mTarget->ADDR_STR(cc, Rd, Rn, offset); } void ARMAssemblerProxy::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2){ mTarget->ADDR_ADD(cc, s, Rd, Rn, Op2); } void ARMAssemblerProxy::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2){ mTarget->ADDR_SUB(cc, s, Rd, Rn, Op2); } }; // namespace android
libpixelflinger/codeflinger/ARMAssemblerProxy.h +9 −0 Original line number Diff line number Diff line Loading @@ -146,6 +146,15 @@ public: virtual void UXTB16(int cc, int Rd, int Rm, int rotate); virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width); virtual void ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_STR (int cc, int Rd, int Rn, uint32_t offset = __immed12_pre(0)); virtual void ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2); virtual void ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2); private: ARMAssemblerInterface* mTarget; }; Loading
libpixelflinger/codeflinger/GGLAssembler.cpp +17 −17 Original line number Diff line number Diff line Loading @@ -263,7 +263,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) const int mask = GGL_DITHER_SIZE-1; parts.dither = reg_t(regs.obtain()); AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg); ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg); LDRB(AL, parts.dither.reg, parts.dither.reg, immed12_pre(GGL_OFFSETOF(ditherMatrix))); } Loading Loading @@ -336,7 +336,7 @@ int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) build_iterate_z(parts); build_iterate_f(parts); if (!mAllMasked) { ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); } SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); B(PL, "fragment_loop"); Loading Loading @@ -392,7 +392,7 @@ void GGLAssembler::build_scanline_prolog( int Rs = scratches.obtain(); parts.cbPtr.setTo(obtainReg(), cb_bits); CONTEXT_LOAD(Rs, state.buffers.color.stride); CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data); SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs base_offset(parts.cbPtr, parts.cbPtr, Rs); scratches.recycle(Rs); Loading Loading @@ -428,11 +428,11 @@ void GGLAssembler::build_scanline_prolog( int Rs = dzdx; int zbase = scratches.obtain(); CONTEXT_LOAD(Rs, state.buffers.depth.stride); CONTEXT_LOAD(zbase, state.buffers.depth.data); CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data); SMLABB(AL, Rs, Ry, Rs, Rx); ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); CONTEXT_STORE(zbase, generated_vars.zbase); ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); CONTEXT_ADDR_STORE(zbase, generated_vars.zbase); } // init texture coordinates Loading @@ -445,8 +445,8 @@ void GGLAssembler::build_scanline_prolog( // init coverage factor application (anti-aliasing) if (mAA) { parts.covPtr.setTo(obtainReg(), 16); CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage); ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); } } Loading Loading @@ -765,8 +765,8 @@ void GGLAssembler::build_depth_test( int depth = scratches.obtain(); int z = parts.z.reg; CONTEXT_LOAD(zbase, generated_vars.zbase); // stall SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); // above does zbase = zbase + ((count >> 16) << 1) if (mask & Z_TEST) { Loading Loading @@ -990,22 +990,22 @@ void GGLAssembler::base_offset( { switch (b.size) { case 32: ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); break; case 24: if (d.reg == b.reg) { ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); ADD(AL, 0, d.reg, d.reg, o.reg); ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); ADDR_ADD(AL, 0, d.reg, d.reg, o.reg); } else { ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); ADD(AL, 0, d.reg, d.reg, b.reg); ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); ADDR_ADD(AL, 0, d.reg, d.reg, b.reg); } break; case 16: ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); break; case 8: ADD(AL, 0, d.reg, b.reg, o.reg); ADDR_ADD(AL, 0, d.reg, b.reg, o.reg); break; } } Loading