Loading services/audioflinger/AudioResamplerFirProcess.h +146 −62 Original line number Diff line number Diff line Loading @@ -44,14 +44,14 @@ static inline void mac(float& l, float& r, TC coef, const float* samples) { l += *samples++ * coef; r += *samples++ * coef; r += *samples * coef; } template<typename TC> static inline void mac(float& l, TC coef, const float* samples) { l += *samples++ * coef; l += *samples * coef; } /* variant for output type TO = int32_t output samples */ Loading @@ -69,62 +69,48 @@ float volumeAdjust(float value, float volume) } /* * Calculates a single output frame (two samples). * * This function computes both the positive half FIR dot product and * the negative half FIR dot product, accumulates, and then applies the volume. * Helper template functions for loop unrolling accumulator operations. * * This is a locked phase filter (it does not compute the interpolation). * * Use fir() to compute the proper coefficient pointers for a polyphase * filter bank. * Unrolling the loops achieves about 2x gain. * Using a recursive template rather than an array of TO[] for the accumulator * values is an additional 10-20% gain. */ template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> static inline void ProcessL(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TI* sP, const TI* sN, const TO* const volumeLR) template<int CHANNELS, typename TO> class Accumulator : public Accumulator<CHANNELS-1, TO> // recursive { COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2) if (CHANNELS == 2) { TO l = 0; TO r = 0; do { mac(l, r, *coefsP++, sP); sP -= CHANNELS; mac(l, r, *coefsN++, sN); sN += CHANNELS; } while (--count > 0); out[0] += volumeAdjust(l, volumeLR[0]); out[1] += volumeAdjust(r, volumeLR[1]); } else { /* CHANNELS == 1 */ TO l = 0; do { mac(l, *coefsP++, sP); sP -= CHANNELS; mac(l, *coefsN++, sN); sN += CHANNELS; } while (--count > 0); out[0] += volumeAdjust(l, volumeLR[0]); out[1] += volumeAdjust(l, volumeLR[1]); public: inline void clear() { value = 0; Accumulator<CHANNELS-1, TO>::clear(); } template<typename TC, typename TI> inline void acc(TC coef, const TI*& data) { mac(value, coef, data++); Accumulator<CHANNELS-1, TO>::acc(coef, data); } inline void volume(TO*& out, TO gain) { *out++ = volumeAdjust(value, gain); Accumulator<CHANNELS-1, TO>::volume(out, gain); } TO value; // one per recursive inherited base class }; template<typename TO> class Accumulator<0, TO> { public: inline void clear() { } template<typename TC, typename TI> inline void acc(TC coef __unused, const TI*& data __unused) { } inline void volume(TO*& out __unused, TO gain __unused) { } }; /* * Calculates a single output frame (two samples) interpolating phase. * * This function computes both the positive half FIR dot product and * the negative half FIR dot product, accumulates, and then applies the volume. * * This is an interpolated phase filter. * * Use fir() to compute the proper coefficient pointers for a polyphase * filter bank. * Helper template functions for interpolating filter coefficients. */ template<typename TC, typename T> Loading Loading @@ -159,30 +145,98 @@ int32_t interpolate(int32_t coef_0, int32_t coef_1, uint32_t lerp) return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0); } template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> /* class scope for passing in functions into templates */ struct InterpCompute { template<typename TC, typename TINTERP> static inline void Process(TO* const out, TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) { return interpolate(coef_0, coef_1, lerp); } template<typename TC, typename TINTERP> static inline TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) { return interpolate(coef_0, coef_1, lerp); } }; struct InterpNull { template<typename TC, typename TINTERP> static inline TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) { return coef_0; } template<typename TC, typename TINTERP> static inline TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) { return coef_1; } }; /* * Calculates a single output frame (two samples). * * The Process*() functions compute both the positive half FIR dot product and * the negative half FIR dot product, accumulates, and then applies the volume. * * Use fir() to compute the proper coefficient pointers for a polyphase * filter bank. * * ProcessBase() is the fundamental processing template function. * * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase. * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase. */ template <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP> static inline void ProcessBase(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TC* coefsP1 __unused, const TC* coefsN1 __unused, const TI* sP, const TI* sN, TINTERP lerpP, const TO* const volumeLR) { COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2) adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0) if (CHANNELS == 2) { if (CHANNELS > 2) { // TO accum[CHANNELS]; Accumulator<CHANNELS, TO> accum; // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0; accum.clear(); for (size_t i = 0; i < count; ++i) { TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP); // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j); const TI *tmp_data = sP; // tmp_ptr seems to work better accum.acc(c, tmp_data); coefsP++; sP -= CHANNELS; c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP); // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j); tmp_data = sN; // tmp_ptr seems faster than directly using sN accum.acc(c, tmp_data); coefsN++; sN += CHANNELS; } // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]); TO *tmp_out = out; // may remove if const out definition changes. accum.volume(tmp_out, volumeLR[0]); } else if (CHANNELS == 2) { TO l = 0; TO r = 0; for (size_t i = 0; i < count; ++i) { mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP); mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); coefsP++; sP -= CHANNELS; mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN); mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); coefsN++; sN += CHANNELS; } Loading @@ -191,10 +245,10 @@ void Process(TO* const out, } else { /* CHANNELS == 1 */ TO l = 0; for (size_t i = 0; i < count; ++i) { mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP); mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); coefsP++; sP -= CHANNELS; mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN); mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); coefsN++; sN += CHANNELS; } Loading @@ -203,6 +257,36 @@ void Process(TO* const out, } } template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> static inline void ProcessL(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TI* sP, const TI* sN, const TO* const volumeLR) { ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR); } template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> static inline void Process(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TC* coefsP1 __unused, const TC* coefsN1 __unused, const TI* sP, const TI* sN, TINTERP lerpP, const TO* const volumeLR) { adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolations ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR); } /* * Calculates a single output frame (two samples) from input sample pointer. * Loading Loading
services/audioflinger/AudioResamplerFirProcess.h +146 −62 Original line number Diff line number Diff line Loading @@ -44,14 +44,14 @@ static inline void mac(float& l, float& r, TC coef, const float* samples) { l += *samples++ * coef; r += *samples++ * coef; r += *samples * coef; } template<typename TC> static inline void mac(float& l, TC coef, const float* samples) { l += *samples++ * coef; l += *samples * coef; } /* variant for output type TO = int32_t output samples */ Loading @@ -69,62 +69,48 @@ float volumeAdjust(float value, float volume) } /* * Calculates a single output frame (two samples). * * This function computes both the positive half FIR dot product and * the negative half FIR dot product, accumulates, and then applies the volume. * Helper template functions for loop unrolling accumulator operations. * * This is a locked phase filter (it does not compute the interpolation). * * Use fir() to compute the proper coefficient pointers for a polyphase * filter bank. * Unrolling the loops achieves about 2x gain. * Using a recursive template rather than an array of TO[] for the accumulator * values is an additional 10-20% gain. */ template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> static inline void ProcessL(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TI* sP, const TI* sN, const TO* const volumeLR) template<int CHANNELS, typename TO> class Accumulator : public Accumulator<CHANNELS-1, TO> // recursive { COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2) if (CHANNELS == 2) { TO l = 0; TO r = 0; do { mac(l, r, *coefsP++, sP); sP -= CHANNELS; mac(l, r, *coefsN++, sN); sN += CHANNELS; } while (--count > 0); out[0] += volumeAdjust(l, volumeLR[0]); out[1] += volumeAdjust(r, volumeLR[1]); } else { /* CHANNELS == 1 */ TO l = 0; do { mac(l, *coefsP++, sP); sP -= CHANNELS; mac(l, *coefsN++, sN); sN += CHANNELS; } while (--count > 0); out[0] += volumeAdjust(l, volumeLR[0]); out[1] += volumeAdjust(l, volumeLR[1]); public: inline void clear() { value = 0; Accumulator<CHANNELS-1, TO>::clear(); } template<typename TC, typename TI> inline void acc(TC coef, const TI*& data) { mac(value, coef, data++); Accumulator<CHANNELS-1, TO>::acc(coef, data); } inline void volume(TO*& out, TO gain) { *out++ = volumeAdjust(value, gain); Accumulator<CHANNELS-1, TO>::volume(out, gain); } TO value; // one per recursive inherited base class }; template<typename TO> class Accumulator<0, TO> { public: inline void clear() { } template<typename TC, typename TI> inline void acc(TC coef __unused, const TI*& data __unused) { } inline void volume(TO*& out __unused, TO gain __unused) { } }; /* * Calculates a single output frame (two samples) interpolating phase. * * This function computes both the positive half FIR dot product and * the negative half FIR dot product, accumulates, and then applies the volume. * * This is an interpolated phase filter. * * Use fir() to compute the proper coefficient pointers for a polyphase * filter bank. * Helper template functions for interpolating filter coefficients. */ template<typename TC, typename T> Loading Loading @@ -159,30 +145,98 @@ int32_t interpolate(int32_t coef_0, int32_t coef_1, uint32_t lerp) return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0); } template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> /* class scope for passing in functions into templates */ struct InterpCompute { template<typename TC, typename TINTERP> static inline void Process(TO* const out, TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) { return interpolate(coef_0, coef_1, lerp); } template<typename TC, typename TINTERP> static inline TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) { return interpolate(coef_0, coef_1, lerp); } }; struct InterpNull { template<typename TC, typename TINTERP> static inline TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) { return coef_0; } template<typename TC, typename TINTERP> static inline TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) { return coef_1; } }; /* * Calculates a single output frame (two samples). * * The Process*() functions compute both the positive half FIR dot product and * the negative half FIR dot product, accumulates, and then applies the volume. * * Use fir() to compute the proper coefficient pointers for a polyphase * filter bank. * * ProcessBase() is the fundamental processing template function. * * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase. * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase. */ template <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP> static inline void ProcessBase(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TC* coefsP1 __unused, const TC* coefsN1 __unused, const TI* sP, const TI* sN, TINTERP lerpP, const TO* const volumeLR) { COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2) adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0) if (CHANNELS == 2) { if (CHANNELS > 2) { // TO accum[CHANNELS]; Accumulator<CHANNELS, TO> accum; // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0; accum.clear(); for (size_t i = 0; i < count; ++i) { TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP); // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j); const TI *tmp_data = sP; // tmp_ptr seems to work better accum.acc(c, tmp_data); coefsP++; sP -= CHANNELS; c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP); // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j); tmp_data = sN; // tmp_ptr seems faster than directly using sN accum.acc(c, tmp_data); coefsN++; sN += CHANNELS; } // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]); TO *tmp_out = out; // may remove if const out definition changes. accum.volume(tmp_out, volumeLR[0]); } else if (CHANNELS == 2) { TO l = 0; TO r = 0; for (size_t i = 0; i < count; ++i) { mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP); mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); coefsP++; sP -= CHANNELS; mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN); mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); coefsN++; sN += CHANNELS; } Loading @@ -191,10 +245,10 @@ void Process(TO* const out, } else { /* CHANNELS == 1 */ TO l = 0; for (size_t i = 0; i < count; ++i) { mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP); mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); coefsP++; sP -= CHANNELS; mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN); mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); coefsN++; sN += CHANNELS; } Loading @@ -203,6 +257,36 @@ void Process(TO* const out, } } template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> static inline void ProcessL(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TI* sP, const TI* sN, const TO* const volumeLR) { ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR); } template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> static inline void Process(TO* const out, int count, const TC* coefsP, const TC* coefsN, const TC* coefsP1 __unused, const TC* coefsN1 __unused, const TI* sP, const TI* sN, TINTERP lerpP, const TO* const volumeLR) { adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolations ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR); } /* * Calculates a single output frame (two samples) from input sample pointer. * Loading