Loading media/libaudioprocessing/Android.bp +19 −0 Original line number Diff line number Diff line Loading @@ -19,6 +19,25 @@ cc_defaults { // uncomment to disable NEON on architectures that actually do support NEON, for benchmarking // "-DUSE_NEON=false", ], arch: { x86: { avx2: { cflags: [ "-mavx2", "-mfma", ], }, }, x86_64: { avx2: { cflags: [ "-mavx2", "-mfma", ], }, }, }, } cc_library_shared { Loading media/libaudioprocessing/AudioResamplerFirOps.h +8 −1 Original line number Diff line number Diff line Loading @@ -36,13 +36,20 @@ namespace android { #include <arm_neon.h> #endif #if defined(__SSSE3__) // Should be supported in x86 ABI for both 32 & 64-bit. #if defined(__AVX2__) // Should be supported in x86 ABI for both 32 & 64-bit. #define USE_AVX2 (true) // Inference AVX2/FMA Intrinsics #define USE_SSE (true) #include <immintrin.h> #elif defined(__SSSE3__) // Should be supported in x86 ABI for both 32 & 64-bit. #define USE_SSE (true) // Inference SSE Intrinsics #define USE_AVX2 (false) #include <tmmintrin.h> #else #define USE_SSE (false) #define USE_AVX2(false) #endif template<typename T, typename U> struct is_same { Loading media/libaudioprocessing/AudioResamplerFirProcessSSE.h +34 −11 Original line number Diff line number Diff line Loading @@ -80,11 +80,16 @@ static inline void ProcessSSEIntrinsic(float* out, posCoef1 = _mm_sub_ps(posCoef1, posCoef); negCoef = _mm_sub_ps(negCoef, negCoef1); #if USE_AVX2 posCoef = _mm_fmadd_ps(posCoef1, interp, posCoef); negCoef = _mm_fmadd_ps(negCoef, interp, negCoef1); #else posCoef1 = _mm_mul_ps(posCoef1, interp); negCoef = _mm_mul_ps(negCoef, interp); posCoef = _mm_add_ps(posCoef1, posCoef); negCoef = _mm_add_ps(negCoef, negCoef1); #endif //USE_AVX2 } switch (CHANNELS) { case 1: { Loading @@ -94,11 +99,17 @@ static inline void ProcessSSEIntrinsic(float* out, sN += 4; posSamp = _mm_shuffle_ps(posSamp, posSamp, 0x1B); #if USE_AVX2 accL = _mm_fmadd_ps(posSamp, posCoef, accL); accL = _mm_fmadd_ps(negSamp, negCoef, accL); #else posSamp = _mm_mul_ps(posSamp, posCoef); negSamp = _mm_mul_ps(negSamp, negCoef); accL = _mm_add_ps(accL, posSamp); accL = _mm_add_ps(accL, negSamp); #endif } break; case 2: { __m128 posSamp0 = _mm_loadu_ps(sP); Loading @@ -114,6 +125,12 @@ static inline void ProcessSSEIntrinsic(float* out, __m128 negSampL = _mm_shuffle_ps(negSamp0, negSamp1, 0x88); __m128 negSampR = _mm_shuffle_ps(negSamp0, negSamp1, 0xDD); #if USE_AVX2 accL = _mm_fmadd_ps(posSampL, posCoef, accL); accR = _mm_fmadd_ps(posSampR, posCoef, accR); accL = _mm_fmadd_ps(negSampL, negCoef, accL); accR = _mm_fmadd_ps(negSampR, negCoef, accR); #else posSampL = _mm_mul_ps(posSampL, posCoef); posSampR = _mm_mul_ps(posSampR, posCoef); negSampL = _mm_mul_ps(negSampL, negCoef); Loading @@ -123,6 +140,8 @@ static inline void ProcessSSEIntrinsic(float* out, accR = _mm_add_ps(accR, posSampR); accL = _mm_add_ps(accL, negSampL); accR = _mm_add_ps(accR, negSampR); #endif } break; } } while (count -= 4); Loading @@ -144,9 +163,13 @@ static inline void ProcessSSEIntrinsic(float* out, outAccum = _mm_hadd_ps(accL, accR); outAccum = _mm_hadd_ps(outAccum, outAccum); } #if USE_AVX2 outSamp = _mm_fmadd_ps(outAccum, vLR,outSamp); #else outAccum = _mm_mul_ps(outAccum, vLR); outSamp = _mm_add_ps(outSamp, outAccum); #endif _mm_storel_pi(reinterpret_cast<__m64*>(out), outSamp); } Loading Loading
media/libaudioprocessing/Android.bp +19 −0 Original line number Diff line number Diff line Loading @@ -19,6 +19,25 @@ cc_defaults { // uncomment to disable NEON on architectures that actually do support NEON, for benchmarking // "-DUSE_NEON=false", ], arch: { x86: { avx2: { cflags: [ "-mavx2", "-mfma", ], }, }, x86_64: { avx2: { cflags: [ "-mavx2", "-mfma", ], }, }, }, } cc_library_shared { Loading
media/libaudioprocessing/AudioResamplerFirOps.h +8 −1 Original line number Diff line number Diff line Loading @@ -36,13 +36,20 @@ namespace android { #include <arm_neon.h> #endif #if defined(__SSSE3__) // Should be supported in x86 ABI for both 32 & 64-bit. #if defined(__AVX2__) // Should be supported in x86 ABI for both 32 & 64-bit. #define USE_AVX2 (true) // Inference AVX2/FMA Intrinsics #define USE_SSE (true) #include <immintrin.h> #elif defined(__SSSE3__) // Should be supported in x86 ABI for both 32 & 64-bit. #define USE_SSE (true) // Inference SSE Intrinsics #define USE_AVX2 (false) #include <tmmintrin.h> #else #define USE_SSE (false) #define USE_AVX2(false) #endif template<typename T, typename U> struct is_same { Loading
media/libaudioprocessing/AudioResamplerFirProcessSSE.h +34 −11 Original line number Diff line number Diff line Loading @@ -80,11 +80,16 @@ static inline void ProcessSSEIntrinsic(float* out, posCoef1 = _mm_sub_ps(posCoef1, posCoef); negCoef = _mm_sub_ps(negCoef, negCoef1); #if USE_AVX2 posCoef = _mm_fmadd_ps(posCoef1, interp, posCoef); negCoef = _mm_fmadd_ps(negCoef, interp, negCoef1); #else posCoef1 = _mm_mul_ps(posCoef1, interp); negCoef = _mm_mul_ps(negCoef, interp); posCoef = _mm_add_ps(posCoef1, posCoef); negCoef = _mm_add_ps(negCoef, negCoef1); #endif //USE_AVX2 } switch (CHANNELS) { case 1: { Loading @@ -94,11 +99,17 @@ static inline void ProcessSSEIntrinsic(float* out, sN += 4; posSamp = _mm_shuffle_ps(posSamp, posSamp, 0x1B); #if USE_AVX2 accL = _mm_fmadd_ps(posSamp, posCoef, accL); accL = _mm_fmadd_ps(negSamp, negCoef, accL); #else posSamp = _mm_mul_ps(posSamp, posCoef); negSamp = _mm_mul_ps(negSamp, negCoef); accL = _mm_add_ps(accL, posSamp); accL = _mm_add_ps(accL, negSamp); #endif } break; case 2: { __m128 posSamp0 = _mm_loadu_ps(sP); Loading @@ -114,6 +125,12 @@ static inline void ProcessSSEIntrinsic(float* out, __m128 negSampL = _mm_shuffle_ps(negSamp0, negSamp1, 0x88); __m128 negSampR = _mm_shuffle_ps(negSamp0, negSamp1, 0xDD); #if USE_AVX2 accL = _mm_fmadd_ps(posSampL, posCoef, accL); accR = _mm_fmadd_ps(posSampR, posCoef, accR); accL = _mm_fmadd_ps(negSampL, negCoef, accL); accR = _mm_fmadd_ps(negSampR, negCoef, accR); #else posSampL = _mm_mul_ps(posSampL, posCoef); posSampR = _mm_mul_ps(posSampR, posCoef); negSampL = _mm_mul_ps(negSampL, negCoef); Loading @@ -123,6 +140,8 @@ static inline void ProcessSSEIntrinsic(float* out, accR = _mm_add_ps(accR, posSampR); accL = _mm_add_ps(accL, negSampL); accR = _mm_add_ps(accR, negSampR); #endif } break; } } while (count -= 4); Loading @@ -144,9 +163,13 @@ static inline void ProcessSSEIntrinsic(float* out, outAccum = _mm_hadd_ps(accL, accR); outAccum = _mm_hadd_ps(outAccum, outAccum); } #if USE_AVX2 outSamp = _mm_fmadd_ps(outAccum, vLR,outSamp); #else outAccum = _mm_mul_ps(outAccum, vLR); outSamp = _mm_add_ps(outSamp, outAccum); #endif _mm_storel_pi(reinterpret_cast<__m64*>(out), outSamp); } Loading