Loading services/audioflinger/AudioResamplerDyn.cpp +2 −1 Original line number Diff line number Diff line Loading @@ -29,9 +29,10 @@ #include <utils/Log.h> #include <audio_utils/primitives.h> #include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here #include "AudioResamplerFirOps.h" // USE_NEON, USE_SSE and USE_INLINE_ASSEMBLY defined here #include "AudioResamplerFirProcess.h" #include "AudioResamplerFirProcessNeon.h" #include "AudioResamplerFirProcessSSE.h" #include "AudioResamplerFirGen.h" // requires math.h #include "AudioResamplerDyn.h" Loading services/audioflinger/AudioResamplerFirOps.h +7 −0 Original line number Diff line number Diff line Loading @@ -36,6 +36,13 @@ namespace android { #include <arm_neon.h> #endif #if defined(__SSSE3__) // Should be supported in x86 ABI for both 32 & 64-bit. #define USE_SSE (true) #include <tmmintrin.h> #else #define USE_SSE (false) #endif template<typename T, typename U> struct is_same { Loading services/audioflinger/AudioResamplerFirProcessSSE.h 0 → 100644 +215 −0 Original line number Diff line number Diff line /* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_SSE_H #define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_SSE_H namespace android { // depends on AudioResamplerFirOps.h, AudioResamplerFirProcess.h #if USE_SSE #define TO_STRING2(x) #x #define TO_STRING(x) TO_STRING2(x) // uncomment to print GCC version, may be relevant for intrinsic optimizations /* #pragma message ("GCC version: " TO_STRING(__GNUC__) \ "." TO_STRING(__GNUC_MINOR__) \ "." TO_STRING(__GNUC_PATCHLEVEL__)) */ // // SSEx specializations are enabled for Process() and ProcessL() in AudioResamplerFirProcess.h // template <int CHANNELS, int STRIDE, bool FIXED> static inline void ProcessSSEIntrinsic(float* out, int count, const float* coefsP, const float* coefsN, const float* sP, const float* sN, const float* volumeLR, float lerpP, const float* coefsP1, const float* coefsN1) { ALOG_ASSERT(count > 0 && (count & 7) == 0); // multiple of 8 COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS == 1 || CHANNELS == 2); sP -= CHANNELS*(4-1); // adjust sP for a loop iteration of four __m128 interp; if (!FIXED) { interp = _mm_set1_ps(lerpP); } __m128 accL, accR; accL = _mm_setzero_ps(); if (CHANNELS == 2) { accR = _mm_setzero_ps(); } do { __m128 posCoef = _mm_load_ps(coefsP); __m128 negCoef = _mm_load_ps(coefsN); coefsP += 4; coefsN += 4; if (!FIXED) { // interpolate __m128 posCoef1 = _mm_load_ps(coefsP1); __m128 negCoef1 = _mm_load_ps(coefsN1); coefsP1 += 4; coefsN1 += 4; // Calculate the final coefficient for interpolation // posCoef = interp * (posCoef1 - posCoef) + posCoef // negCoef = interp * (negCoef - negCoef1) + negCoef1 posCoef1 = _mm_sub_ps(posCoef1, posCoef); negCoef = _mm_sub_ps(negCoef, negCoef1); posCoef1 = _mm_mul_ps(posCoef1, interp); negCoef = _mm_mul_ps(negCoef, interp); posCoef = _mm_add_ps(posCoef1, posCoef); negCoef = _mm_add_ps(negCoef, negCoef1); } switch (CHANNELS) { case 1: { __m128 posSamp = _mm_loadu_ps(sP); __m128 negSamp = _mm_loadu_ps(sN); sP -= 4; sN += 4; posSamp = _mm_shuffle_ps(posSamp, posSamp, 0x1B); posSamp = _mm_mul_ps(posSamp, posCoef); negSamp = _mm_mul_ps(negSamp, negCoef); accL = _mm_add_ps(accL, posSamp); accL = _mm_add_ps(accL, negSamp); } break; case 2: { __m128 posSamp0 = _mm_loadu_ps(sP); __m128 posSamp1 = _mm_loadu_ps(sP+4); __m128 negSamp0 = _mm_loadu_ps(sN); __m128 negSamp1 = _mm_loadu_ps(sN+4); sP -= 8; sN += 8; // deinterleave everything and reverse the positives __m128 posSampL = _mm_shuffle_ps(posSamp1, posSamp0, 0x22); __m128 posSampR = _mm_shuffle_ps(posSamp1, posSamp0, 0x77); __m128 negSampL = _mm_shuffle_ps(negSamp0, negSamp1, 0x88); __m128 negSampR = _mm_shuffle_ps(negSamp0, negSamp1, 0xDD); posSampL = _mm_mul_ps(posSampL, posCoef); posSampR = _mm_mul_ps(posSampR, posCoef); negSampL = _mm_mul_ps(negSampL, negCoef); negSampR = _mm_mul_ps(negSampR, negCoef); accL = _mm_add_ps(accL, posSampL); accR = _mm_add_ps(accR, posSampR); accL = _mm_add_ps(accL, negSampL); accR = _mm_add_ps(accR, negSampR); } break; } } while (count -= 4); // multiply by volume and save __m128 vLR = _mm_setzero_ps(); __m128 outSamp; vLR = _mm_loadl_pi(vLR, reinterpret_cast<const __m64*>(volumeLR)); outSamp = _mm_loadl_pi(vLR, reinterpret_cast<__m64*>(out)); // combine and funnel down accumulator __m128 outAccum = _mm_setzero_ps(); if (CHANNELS == 1) { // duplicate accL to both L and R outAccum = _mm_add_ps(accL, _mm_movehl_ps(accL, accL)); outAccum = _mm_add_ps(outAccum, _mm_shuffle_ps(outAccum, outAccum, 0x11)); } else if (CHANNELS == 2) { // accR contains R, fold in outAccum = _mm_hadd_ps(accL, accR); outAccum = _mm_hadd_ps(outAccum, outAccum); } outAccum = _mm_mul_ps(outAccum, vLR); outSamp = _mm_add_ps(outSamp, outAccum); _mm_storel_pi(reinterpret_cast<__m64*>(out), outSamp); } template<> inline void ProcessL<1, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* sP, const float* sN, const float* const volumeLR) { ProcessSSEIntrinsic<1, 16, true>(out, count, coefsP, coefsN, sP, sN, volumeLR, 0 /*lerpP*/, NULL /*coefsP1*/, NULL /*coefsN1*/); } template<> inline void ProcessL<2, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* sP, const float* sN, const float* const volumeLR) { ProcessSSEIntrinsic<2, 16, true>(out, count, coefsP, coefsN, sP, sN, volumeLR, 0 /*lerpP*/, NULL /*coefsP1*/, NULL /*coefsN1*/); } template<> inline void Process<1, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* coefsP1, const float* coefsN1, const float* sP, const float* sN, float lerpP, const float* const volumeLR) { ProcessSSEIntrinsic<1, 16, false>(out, count, coefsP, coefsN, sP, sN, volumeLR, lerpP, coefsP1, coefsN1); } template<> inline void Process<2, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* coefsP1, const float* coefsN1, const float* sP, const float* sN, float lerpP, const float* const volumeLR) { ProcessSSEIntrinsic<2, 16, false>(out, count, coefsP, coefsN, sP, sN, volumeLR, lerpP, coefsP1, coefsN1); } #endif //USE_SSE } // namespace android #endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_SSE_H*/ Loading
services/audioflinger/AudioResamplerDyn.cpp +2 −1 Original line number Diff line number Diff line Loading @@ -29,9 +29,10 @@ #include <utils/Log.h> #include <audio_utils/primitives.h> #include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here #include "AudioResamplerFirOps.h" // USE_NEON, USE_SSE and USE_INLINE_ASSEMBLY defined here #include "AudioResamplerFirProcess.h" #include "AudioResamplerFirProcessNeon.h" #include "AudioResamplerFirProcessSSE.h" #include "AudioResamplerFirGen.h" // requires math.h #include "AudioResamplerDyn.h" Loading
services/audioflinger/AudioResamplerFirOps.h +7 −0 Original line number Diff line number Diff line Loading @@ -36,6 +36,13 @@ namespace android { #include <arm_neon.h> #endif #if defined(__SSSE3__) // Should be supported in x86 ABI for both 32 & 64-bit. #define USE_SSE (true) #include <tmmintrin.h> #else #define USE_SSE (false) #endif template<typename T, typename U> struct is_same { Loading
services/audioflinger/AudioResamplerFirProcessSSE.h 0 → 100644 +215 −0 Original line number Diff line number Diff line /* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_SSE_H #define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_SSE_H namespace android { // depends on AudioResamplerFirOps.h, AudioResamplerFirProcess.h #if USE_SSE #define TO_STRING2(x) #x #define TO_STRING(x) TO_STRING2(x) // uncomment to print GCC version, may be relevant for intrinsic optimizations /* #pragma message ("GCC version: " TO_STRING(__GNUC__) \ "." TO_STRING(__GNUC_MINOR__) \ "." TO_STRING(__GNUC_PATCHLEVEL__)) */ // // SSEx specializations are enabled for Process() and ProcessL() in AudioResamplerFirProcess.h // template <int CHANNELS, int STRIDE, bool FIXED> static inline void ProcessSSEIntrinsic(float* out, int count, const float* coefsP, const float* coefsN, const float* sP, const float* sN, const float* volumeLR, float lerpP, const float* coefsP1, const float* coefsN1) { ALOG_ASSERT(count > 0 && (count & 7) == 0); // multiple of 8 COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS == 1 || CHANNELS == 2); sP -= CHANNELS*(4-1); // adjust sP for a loop iteration of four __m128 interp; if (!FIXED) { interp = _mm_set1_ps(lerpP); } __m128 accL, accR; accL = _mm_setzero_ps(); if (CHANNELS == 2) { accR = _mm_setzero_ps(); } do { __m128 posCoef = _mm_load_ps(coefsP); __m128 negCoef = _mm_load_ps(coefsN); coefsP += 4; coefsN += 4; if (!FIXED) { // interpolate __m128 posCoef1 = _mm_load_ps(coefsP1); __m128 negCoef1 = _mm_load_ps(coefsN1); coefsP1 += 4; coefsN1 += 4; // Calculate the final coefficient for interpolation // posCoef = interp * (posCoef1 - posCoef) + posCoef // negCoef = interp * (negCoef - negCoef1) + negCoef1 posCoef1 = _mm_sub_ps(posCoef1, posCoef); negCoef = _mm_sub_ps(negCoef, negCoef1); posCoef1 = _mm_mul_ps(posCoef1, interp); negCoef = _mm_mul_ps(negCoef, interp); posCoef = _mm_add_ps(posCoef1, posCoef); negCoef = _mm_add_ps(negCoef, negCoef1); } switch (CHANNELS) { case 1: { __m128 posSamp = _mm_loadu_ps(sP); __m128 negSamp = _mm_loadu_ps(sN); sP -= 4; sN += 4; posSamp = _mm_shuffle_ps(posSamp, posSamp, 0x1B); posSamp = _mm_mul_ps(posSamp, posCoef); negSamp = _mm_mul_ps(negSamp, negCoef); accL = _mm_add_ps(accL, posSamp); accL = _mm_add_ps(accL, negSamp); } break; case 2: { __m128 posSamp0 = _mm_loadu_ps(sP); __m128 posSamp1 = _mm_loadu_ps(sP+4); __m128 negSamp0 = _mm_loadu_ps(sN); __m128 negSamp1 = _mm_loadu_ps(sN+4); sP -= 8; sN += 8; // deinterleave everything and reverse the positives __m128 posSampL = _mm_shuffle_ps(posSamp1, posSamp0, 0x22); __m128 posSampR = _mm_shuffle_ps(posSamp1, posSamp0, 0x77); __m128 negSampL = _mm_shuffle_ps(negSamp0, negSamp1, 0x88); __m128 negSampR = _mm_shuffle_ps(negSamp0, negSamp1, 0xDD); posSampL = _mm_mul_ps(posSampL, posCoef); posSampR = _mm_mul_ps(posSampR, posCoef); negSampL = _mm_mul_ps(negSampL, negCoef); negSampR = _mm_mul_ps(negSampR, negCoef); accL = _mm_add_ps(accL, posSampL); accR = _mm_add_ps(accR, posSampR); accL = _mm_add_ps(accL, negSampL); accR = _mm_add_ps(accR, negSampR); } break; } } while (count -= 4); // multiply by volume and save __m128 vLR = _mm_setzero_ps(); __m128 outSamp; vLR = _mm_loadl_pi(vLR, reinterpret_cast<const __m64*>(volumeLR)); outSamp = _mm_loadl_pi(vLR, reinterpret_cast<__m64*>(out)); // combine and funnel down accumulator __m128 outAccum = _mm_setzero_ps(); if (CHANNELS == 1) { // duplicate accL to both L and R outAccum = _mm_add_ps(accL, _mm_movehl_ps(accL, accL)); outAccum = _mm_add_ps(outAccum, _mm_shuffle_ps(outAccum, outAccum, 0x11)); } else if (CHANNELS == 2) { // accR contains R, fold in outAccum = _mm_hadd_ps(accL, accR); outAccum = _mm_hadd_ps(outAccum, outAccum); } outAccum = _mm_mul_ps(outAccum, vLR); outSamp = _mm_add_ps(outSamp, outAccum); _mm_storel_pi(reinterpret_cast<__m64*>(out), outSamp); } template<> inline void ProcessL<1, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* sP, const float* sN, const float* const volumeLR) { ProcessSSEIntrinsic<1, 16, true>(out, count, coefsP, coefsN, sP, sN, volumeLR, 0 /*lerpP*/, NULL /*coefsP1*/, NULL /*coefsN1*/); } template<> inline void ProcessL<2, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* sP, const float* sN, const float* const volumeLR) { ProcessSSEIntrinsic<2, 16, true>(out, count, coefsP, coefsN, sP, sN, volumeLR, 0 /*lerpP*/, NULL /*coefsP1*/, NULL /*coefsN1*/); } template<> inline void Process<1, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* coefsP1, const float* coefsN1, const float* sP, const float* sN, float lerpP, const float* const volumeLR) { ProcessSSEIntrinsic<1, 16, false>(out, count, coefsP, coefsN, sP, sN, volumeLR, lerpP, coefsP1, coefsN1); } template<> inline void Process<2, 16>(float* const out, int count, const float* coefsP, const float* coefsN, const float* coefsP1, const float* coefsN1, const float* sP, const float* sN, float lerpP, const float* const volumeLR) { ProcessSSEIntrinsic<2, 16, false>(out, count, coefsP, coefsN, sP, sN, volumeLR, lerpP, coefsP1, coefsN1); } #endif //USE_SSE } // namespace android #endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_SSE_H*/