Loading media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S +27 −8 Original line number Diff line number Diff line Loading @@ -17,7 +17,8 @@ armVCM4P10_DecodeCoeffsToPair: SUB sp,sp,#0x40 LDR r10,[r0,#0] LDR r12,[r1,#0] LDR r6, =armVCM4P10_CAVLCCoeffTokenTables LDR r6, .LarmVCM4P10_CAVLCCoeffTokenTables P0: ADD r6, pc LDR r4,[sp,#0x68] LDRB r9,[r10,#2] LDRB r8,[r10,#1] Loading Loading @@ -131,7 +132,8 @@ L0x184: LSRS r8,r7,#1 RSBCS r8,r8,#0 STRH r8,[r2],#2 LDR r9, =armVCM4P10_SuffixToLevel LDR r9, .LarmVCM4P10_SuffixToLevel P1: ADD r9, pc LDRSB r8,[r9,r4] TEQ r4,#0 MOVEQ r4,#1 Loading @@ -148,8 +150,9 @@ L0x1b8: SUB lr,lr,#1 BEQ L0x2b0 TEQ r8,#4 LDREQ r6, =(armVCM4P10_CAVLCTotalZeros2x2Tables - 4) LDRNE r6, =(armVCM4P10_CAVLCTotalZeroTables - 4) LDREQ r6, .LarmVCM4P10_CAVLCTotalZeros2x2Tables LDRNE r6, .LarmVCM4P10_CAVLCTotalZeroTables P2: ADD r6, pc LDR r6,[r6,r5,LSL #2] LSLS r8,r11,r12 MOVS r7,#0x1e Loading @@ -175,7 +178,8 @@ L0x224: BIC r7,r8,#0xf000 CMP r7,#0x10 BGE L0x33c LDR r3, =(armVCM4P10_CAVLCRunBeforeTables - 4) LDR r3, .LarmVCM4P10_CAVLCRunBeforeTables P3: ADD r3, pc ADD r4,sp,#0x2c MOVS r1,r7 ADD lr,lr,r1 Loading Loading @@ -228,8 +232,9 @@ L0x2bc: LDR r3,[sp,#8] LDR r0,[r3,#0] TEQ r8,#4 LDREQ r6, =armVCM4P10_ZigZag_2x2 LDRNE r6, =armVCM4P10_ZigZag_4x4 LDREQ r6, .LarmVCM4P10_ZigZag_2x2 LDRNE r6, .LarmVCM4P10_ZigZag_4x4 P4: ADD r6, pc L0x2ec: LDRB r9,[r4],#1 LDRB r8,[r6,lr] Loading Loading @@ -268,5 +273,19 @@ L0x35c: POP {r4-r12,pc} .endfunc .end .LarmVCM4P10_CAVLCCoeffTokenTables: .word armVCM4P10_CAVLCCoeffTokenTables-(P0+8) .LarmVCM4P10_SuffixToLevel: .word armVCM4P10_SuffixToLevel-(P1+8) .LarmVCM4P10_CAVLCTotalZeros2x2Tables: .word (armVCM4P10_CAVLCTotalZeros2x2Tables - 4)-(P2+8) .LarmVCM4P10_CAVLCTotalZeroTables: .word (armVCM4P10_CAVLCTotalZeroTables - 4)-(P2+8) .LarmVCM4P10_CAVLCRunBeforeTables: .word (armVCM4P10_CAVLCRunBeforeTables - 4)-(P3+8) .LarmVCM4P10_ZigZag_2x2: .word armVCM4P10_ZigZag_2x2-(P4+8) .LarmVCM4P10_ZigZag_4x4: .word armVCM4P10_ZigZag_4x4-(P4+8) .end media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S +11 −12 Original line number Diff line number Diff line Loading @@ -9,20 +9,18 @@ .arm .fpu neon .section .rodata .text .align 4 armVCM4P10_WidthBranchTableMVIsNotZero: .word WidthIs2MVIsNotZero, WidthIs2MVIsNotZero .word WidthIs4MVIsNotZero, WidthIs4MVIsNotZero .word WidthIs8MVIsNotZero .word WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8) .word WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8) .word WidthIs8MVIsNotZero-(P0+8) armVCM4P10_WidthBranchTableMVIsZero: .word WidthIs2MVIsZero, WidthIs2MVIsZero .word WidthIs4MVIsZero, WidthIs4MVIsZero .word WidthIs8MVIsZero .text .word WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8) .word WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8) .word WidthIs8MVIsZero-(P0+8) .global armVCM4P10_Interpolate_Chroma .func armVCM4P10_Interpolate_Chroma Loading @@ -35,9 +33,9 @@ armVCM4P10_Interpolate_Chroma: RSB r9,r7,#8 CMN r6,r7 MOV r10,#1 LDREQ r11, =armVCM4P10_WidthBranchTableMVIsZero ADREQ r11, armVCM4P10_WidthBranchTableMVIsZero SUB lr,r1,r10 LDRNE r11, =armVCM4P10_WidthBranchTableMVIsNotZero ADRNE r11, armVCM4P10_WidthBranchTableMVIsNotZero VLD1.8 {d0},[r0],r10 SMULBB r12,r8,r9 SMULBB r9,r6,r9 Loading @@ -48,7 +46,8 @@ armVCM4P10_Interpolate_Chroma: VDUP.8 d13,r9 VDUP.8 d14,r8 VDUP.8 d15,r6 LDR pc,[r11,r4,LSL #1] LDR r11,[r11, r4, lsl #1] P0: ADD pc,r11 WidthIs8MVIsNotZero: VLD1.8 {d2},[r0],r10 Loading media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S +13 −3 Original line number Diff line number Diff line Loading @@ -26,9 +26,12 @@ omxVCM4P10_DequantTransformResidualFromPairAndAdd: MOV r1,r4 BL armVCM4P10_UnpackBlock4x4 ;// LDR r1,[sp,#0x60] LDR r11, =armVCM4P10_QPModuloTable LDR r10, =armVCM4P10_QPDivTable LDR r2, =armVCM4P10_VMatrixU16 LDR r11, .LarmVCM4P10_QPModuloTable P0: ADD r11, pc LDR r10, .LarmVCM4P10_QPDivTable P1: ADD r10, pc LDR r2, .LarmVCM4P10_VMatrixU16 P2: ADD r2, pc LDRSB r12,[r11,r1] LDRSB lr,[r10,r1] LDR r10, =0x3020504 Loading Loading @@ -115,5 +118,12 @@ L0x130: POP {r4-r12,pc} .endfunc .LarmVCM4P10_QPModuloTable: .word armVCM4P10_QPModuloTable-(P0+8) .LarmVCM4P10_QPDivTable: .word armVCM4P10_QPDivTable-(P1+8) .LarmVCM4P10_VMatrixU16: .word armVCM4P10_VMatrixU16-(P2+8) .end media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S +8 −8 Original line number Diff line number Diff line Loading @@ -8,31 +8,31 @@ .arm .fpu neon .section .rodata .text .align 4 armVCM4P10_pIndexTable8x8: .word OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR .word OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE .word OMX_VC_CHROMA_DC-(P0+8), OMX_VC_CHROMA_HOR-(P0+8) .word OMX_VC_CHROMA_VERT-(P0+8), OMX_VC_CHROMA_PLANE-(P0+8) armVCM4P10_MultiplierTableChroma8x8: .hword 3, 2, 1,4 .hword -3,-2,-1,0 .hword 1, 2, 3,4 .text .global omxVCM4P10_PredictIntraChroma_8x8 .func omxVCM4P10_PredictIntraChroma_8x8 omxVCM4P10_PredictIntraChroma_8x8: PUSH {r4-r10,lr} VPUSH {d8-d15} LDR r8, =armVCM4P10_pIndexTable8x8 ADR r8, armVCM4P10_pIndexTable8x8 LDR r6,[sp,#0x68] LDR r4,[sp,#0x60] LDR r5,[sp,#0x64] LDR r7,[sp,#0x6c] LDR pc,[r8,r6,LSL #2] LDR r8,[r8,r6,LSL #2] P0: ADD pc,r8 OMX_VC_CHROMA_DC: TST r7,#2 BEQ L0xe8 Loading Loading @@ -151,7 +151,7 @@ OMX_VC_CHROMA_PLANE: VSUBL.U8 q7,d3,d2 VSHR.U64 d3,d3,#8 VSUBL.U8 q6,d3,d1 LDR r2, =armVCM4P10_MultiplierTableChroma8x8 ADR r2, armVCM4P10_MultiplierTableChroma8x8 VSHL.I64 d4,d4,#16 VEXT.8 d9,d4,d6,#2 VLD1.16 {d10},[r2]! Loading media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S +7 −8 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ .arm .fpu neon .section .rodata .text .align 4 ;//------------------------------------------------------- ;// This table for implementing switch case of C in asm by Loading @@ -17,9 +17,8 @@ ;//------------------------------------------------------- armVCM4P10_pIndexTable16x16: .word OMX_VC_16X16_VERT, OMX_VC_16X16_HOR .word OMX_VC_16X16_DC, OMX_VC_16X16_PLANE .word OMX_VC_16X16_VERT-(P0+8), OMX_VC_16X16_HOR-(P0+8) .word OMX_VC_16X16_DC-(P0+8), OMX_VC_16X16_PLANE-(P0+8) armVCM4P10_MultiplierTable16x16: Loading @@ -27,20 +26,20 @@ armVCM4P10_MultiplierTable16x16: .hword 0, 1, 2, 3, 4, 5, 6, 7 .hword 8, 9, 10, 11, 12, 13, 14, 15 .text .global omxVCM4P10_PredictIntra_16x16 .func omxVCM4P10_PredictIntra_16x16 omxVCM4P10_PredictIntra_16x16: PUSH {r4-r12,lr} VPUSH {d8-d15} LDR r9, =armVCM4P10_pIndexTable16x16 ADR r9, armVCM4P10_pIndexTable16x16 LDR r6,[sp,#0x70] LDR r4,[sp,#0x68] LDR r5,[sp,#0x6c] LDR r7,[sp,#0x74] MOV r12,#0x10 LDR pc,[r9,r6,LSL #2] LDR r9,[r9,r6,LSL #2] P0: ADD pc,r9 OMX_VC_16X16_VERT: VLD1.8 {d0,d1},[r1] ADD r8,r3,r5 Loading Loading @@ -162,7 +161,7 @@ L0x198: VPOP {d8-d15} POP {r4-r12,pc} OMX_VC_16X16_PLANE: LDR r9, =armVCM4P10_MultiplierTable16x16 ADR r9, armVCM4P10_MultiplierTable16x16 VLD1.8 {d0,d1},[r1] VLD1.8 {d4[0]},[r2] ADD r8,r0,r4 Loading Loading
media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S +27 −8 Original line number Diff line number Diff line Loading @@ -17,7 +17,8 @@ armVCM4P10_DecodeCoeffsToPair: SUB sp,sp,#0x40 LDR r10,[r0,#0] LDR r12,[r1,#0] LDR r6, =armVCM4P10_CAVLCCoeffTokenTables LDR r6, .LarmVCM4P10_CAVLCCoeffTokenTables P0: ADD r6, pc LDR r4,[sp,#0x68] LDRB r9,[r10,#2] LDRB r8,[r10,#1] Loading Loading @@ -131,7 +132,8 @@ L0x184: LSRS r8,r7,#1 RSBCS r8,r8,#0 STRH r8,[r2],#2 LDR r9, =armVCM4P10_SuffixToLevel LDR r9, .LarmVCM4P10_SuffixToLevel P1: ADD r9, pc LDRSB r8,[r9,r4] TEQ r4,#0 MOVEQ r4,#1 Loading @@ -148,8 +150,9 @@ L0x1b8: SUB lr,lr,#1 BEQ L0x2b0 TEQ r8,#4 LDREQ r6, =(armVCM4P10_CAVLCTotalZeros2x2Tables - 4) LDRNE r6, =(armVCM4P10_CAVLCTotalZeroTables - 4) LDREQ r6, .LarmVCM4P10_CAVLCTotalZeros2x2Tables LDRNE r6, .LarmVCM4P10_CAVLCTotalZeroTables P2: ADD r6, pc LDR r6,[r6,r5,LSL #2] LSLS r8,r11,r12 MOVS r7,#0x1e Loading @@ -175,7 +178,8 @@ L0x224: BIC r7,r8,#0xf000 CMP r7,#0x10 BGE L0x33c LDR r3, =(armVCM4P10_CAVLCRunBeforeTables - 4) LDR r3, .LarmVCM4P10_CAVLCRunBeforeTables P3: ADD r3, pc ADD r4,sp,#0x2c MOVS r1,r7 ADD lr,lr,r1 Loading Loading @@ -228,8 +232,9 @@ L0x2bc: LDR r3,[sp,#8] LDR r0,[r3,#0] TEQ r8,#4 LDREQ r6, =armVCM4P10_ZigZag_2x2 LDRNE r6, =armVCM4P10_ZigZag_4x4 LDREQ r6, .LarmVCM4P10_ZigZag_2x2 LDRNE r6, .LarmVCM4P10_ZigZag_4x4 P4: ADD r6, pc L0x2ec: LDRB r9,[r4],#1 LDRB r8,[r6,lr] Loading Loading @@ -268,5 +273,19 @@ L0x35c: POP {r4-r12,pc} .endfunc .end .LarmVCM4P10_CAVLCCoeffTokenTables: .word armVCM4P10_CAVLCCoeffTokenTables-(P0+8) .LarmVCM4P10_SuffixToLevel: .word armVCM4P10_SuffixToLevel-(P1+8) .LarmVCM4P10_CAVLCTotalZeros2x2Tables: .word (armVCM4P10_CAVLCTotalZeros2x2Tables - 4)-(P2+8) .LarmVCM4P10_CAVLCTotalZeroTables: .word (armVCM4P10_CAVLCTotalZeroTables - 4)-(P2+8) .LarmVCM4P10_CAVLCRunBeforeTables: .word (armVCM4P10_CAVLCRunBeforeTables - 4)-(P3+8) .LarmVCM4P10_ZigZag_2x2: .word armVCM4P10_ZigZag_2x2-(P4+8) .LarmVCM4P10_ZigZag_4x4: .word armVCM4P10_ZigZag_4x4-(P4+8) .end
media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S +11 −12 Original line number Diff line number Diff line Loading @@ -9,20 +9,18 @@ .arm .fpu neon .section .rodata .text .align 4 armVCM4P10_WidthBranchTableMVIsNotZero: .word WidthIs2MVIsNotZero, WidthIs2MVIsNotZero .word WidthIs4MVIsNotZero, WidthIs4MVIsNotZero .word WidthIs8MVIsNotZero .word WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8) .word WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8) .word WidthIs8MVIsNotZero-(P0+8) armVCM4P10_WidthBranchTableMVIsZero: .word WidthIs2MVIsZero, WidthIs2MVIsZero .word WidthIs4MVIsZero, WidthIs4MVIsZero .word WidthIs8MVIsZero .text .word WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8) .word WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8) .word WidthIs8MVIsZero-(P0+8) .global armVCM4P10_Interpolate_Chroma .func armVCM4P10_Interpolate_Chroma Loading @@ -35,9 +33,9 @@ armVCM4P10_Interpolate_Chroma: RSB r9,r7,#8 CMN r6,r7 MOV r10,#1 LDREQ r11, =armVCM4P10_WidthBranchTableMVIsZero ADREQ r11, armVCM4P10_WidthBranchTableMVIsZero SUB lr,r1,r10 LDRNE r11, =armVCM4P10_WidthBranchTableMVIsNotZero ADRNE r11, armVCM4P10_WidthBranchTableMVIsNotZero VLD1.8 {d0},[r0],r10 SMULBB r12,r8,r9 SMULBB r9,r6,r9 Loading @@ -48,7 +46,8 @@ armVCM4P10_Interpolate_Chroma: VDUP.8 d13,r9 VDUP.8 d14,r8 VDUP.8 d15,r6 LDR pc,[r11,r4,LSL #1] LDR r11,[r11, r4, lsl #1] P0: ADD pc,r11 WidthIs8MVIsNotZero: VLD1.8 {d2},[r0],r10 Loading
media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S +13 −3 Original line number Diff line number Diff line Loading @@ -26,9 +26,12 @@ omxVCM4P10_DequantTransformResidualFromPairAndAdd: MOV r1,r4 BL armVCM4P10_UnpackBlock4x4 ;// LDR r1,[sp,#0x60] LDR r11, =armVCM4P10_QPModuloTable LDR r10, =armVCM4P10_QPDivTable LDR r2, =armVCM4P10_VMatrixU16 LDR r11, .LarmVCM4P10_QPModuloTable P0: ADD r11, pc LDR r10, .LarmVCM4P10_QPDivTable P1: ADD r10, pc LDR r2, .LarmVCM4P10_VMatrixU16 P2: ADD r2, pc LDRSB r12,[r11,r1] LDRSB lr,[r10,r1] LDR r10, =0x3020504 Loading Loading @@ -115,5 +118,12 @@ L0x130: POP {r4-r12,pc} .endfunc .LarmVCM4P10_QPModuloTable: .word armVCM4P10_QPModuloTable-(P0+8) .LarmVCM4P10_QPDivTable: .word armVCM4P10_QPDivTable-(P1+8) .LarmVCM4P10_VMatrixU16: .word armVCM4P10_VMatrixU16-(P2+8) .end
media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S +8 −8 Original line number Diff line number Diff line Loading @@ -8,31 +8,31 @@ .arm .fpu neon .section .rodata .text .align 4 armVCM4P10_pIndexTable8x8: .word OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR .word OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE .word OMX_VC_CHROMA_DC-(P0+8), OMX_VC_CHROMA_HOR-(P0+8) .word OMX_VC_CHROMA_VERT-(P0+8), OMX_VC_CHROMA_PLANE-(P0+8) armVCM4P10_MultiplierTableChroma8x8: .hword 3, 2, 1,4 .hword -3,-2,-1,0 .hword 1, 2, 3,4 .text .global omxVCM4P10_PredictIntraChroma_8x8 .func omxVCM4P10_PredictIntraChroma_8x8 omxVCM4P10_PredictIntraChroma_8x8: PUSH {r4-r10,lr} VPUSH {d8-d15} LDR r8, =armVCM4P10_pIndexTable8x8 ADR r8, armVCM4P10_pIndexTable8x8 LDR r6,[sp,#0x68] LDR r4,[sp,#0x60] LDR r5,[sp,#0x64] LDR r7,[sp,#0x6c] LDR pc,[r8,r6,LSL #2] LDR r8,[r8,r6,LSL #2] P0: ADD pc,r8 OMX_VC_CHROMA_DC: TST r7,#2 BEQ L0xe8 Loading Loading @@ -151,7 +151,7 @@ OMX_VC_CHROMA_PLANE: VSUBL.U8 q7,d3,d2 VSHR.U64 d3,d3,#8 VSUBL.U8 q6,d3,d1 LDR r2, =armVCM4P10_MultiplierTableChroma8x8 ADR r2, armVCM4P10_MultiplierTableChroma8x8 VSHL.I64 d4,d4,#16 VEXT.8 d9,d4,d6,#2 VLD1.16 {d10},[r2]! Loading
media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S +7 −8 Original line number Diff line number Diff line Loading @@ -9,7 +9,7 @@ .arm .fpu neon .section .rodata .text .align 4 ;//------------------------------------------------------- ;// This table for implementing switch case of C in asm by Loading @@ -17,9 +17,8 @@ ;//------------------------------------------------------- armVCM4P10_pIndexTable16x16: .word OMX_VC_16X16_VERT, OMX_VC_16X16_HOR .word OMX_VC_16X16_DC, OMX_VC_16X16_PLANE .word OMX_VC_16X16_VERT-(P0+8), OMX_VC_16X16_HOR-(P0+8) .word OMX_VC_16X16_DC-(P0+8), OMX_VC_16X16_PLANE-(P0+8) armVCM4P10_MultiplierTable16x16: Loading @@ -27,20 +26,20 @@ armVCM4P10_MultiplierTable16x16: .hword 0, 1, 2, 3, 4, 5, 6, 7 .hword 8, 9, 10, 11, 12, 13, 14, 15 .text .global omxVCM4P10_PredictIntra_16x16 .func omxVCM4P10_PredictIntra_16x16 omxVCM4P10_PredictIntra_16x16: PUSH {r4-r12,lr} VPUSH {d8-d15} LDR r9, =armVCM4P10_pIndexTable16x16 ADR r9, armVCM4P10_pIndexTable16x16 LDR r6,[sp,#0x70] LDR r4,[sp,#0x68] LDR r5,[sp,#0x6c] LDR r7,[sp,#0x74] MOV r12,#0x10 LDR pc,[r9,r6,LSL #2] LDR r9,[r9,r6,LSL #2] P0: ADD pc,r9 OMX_VC_16X16_VERT: VLD1.8 {d0,d1},[r1] ADD r8,r3,r5 Loading Loading @@ -162,7 +161,7 @@ L0x198: VPOP {d8-d15} POP {r4-r12,pc} OMX_VC_16X16_PLANE: LDR r9, =armVCM4P10_MultiplierTable16x16 ADR r9, armVCM4P10_MultiplierTable16x16 VLD1.8 {d0,d1},[r1] VLD1.8 {d4[0]},[r2] ADD r8,r0,r4 Loading