Add multichannel to audio resample processing (68ffa200) · Commits · e / os / android_frameworks_av

services/audioflinger/AudioResamplerFirProcess.h

+146 −62

Original line number	Original line	Diff line number	Diff line
	@@ -44,14 +44,14 @@ static inline
	void mac(float& l, float& r, TC coef, const float* samples)		void mac(float& l, float& r, TC coef, const float* samples)
	{		{
	l += samples++ coef;		l += samples++ coef;
	r += samples++ coef;		r += samples coef;
	}		}

	template<typename TC>		template<typename TC>
	static inline		static inline
	void mac(float& l, TC coef, const float* samples)		void mac(float& l, TC coef, const float* samples)
	{		{
	l += samples++ coef;		l += samples coef;
	}		}

	/* variant for output type TO = int32_t output samples */		/* variant for output type TO = int32_t output samples */
	@@ -69,62 +69,48 @@ float volumeAdjust(float value, float volume)
	}		}

	/*		/*
	* Calculates a single output frame (two samples).		* Helper template functions for loop unrolling accumulator operations.
	*
	* This function computes both the positive half FIR dot product and
	* the negative half FIR dot product, accumulates, and then applies the volume.
	*		*
	* This is a locked phase filter (it does not compute the interpolation).		* Unrolling the loops achieves about 2x gain.
	*		* Using a recursive template rather than an array of TO[] for the accumulator
	* Use fir() to compute the proper coefficient pointers for a polyphase		* values is an additional 10-20% gain.
	* filter bank.
	*/		*/

	template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>		template<int CHANNELS, typename TO>
	static inline		class Accumulator : public Accumulator<CHANNELS-1, TO> // recursive
	void ProcessL(TO* const out,
	int count,
	const TC* coefsP,
	const TC* coefsN,
	const TI* sP,
	const TI* sN,
	const TO* const volumeLR)
	{		{
	COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)		public:
	if (CHANNELS == 2) {		inline void clear() {
	TO l = 0;		value = 0;
	TO r = 0;		Accumulator<CHANNELS-1, TO>::clear();
	do {
	mac(l, r, *coefsP++, sP);
	sP -= CHANNELS;
	mac(l, r, *coefsN++, sN);
	sN += CHANNELS;
	} while (--count > 0);
	out[0] += volumeAdjust(l, volumeLR[0]);
	out[1] += volumeAdjust(r, volumeLR[1]);
	} else { /* CHANNELS == 1 */
	TO l = 0;
	do {
	mac(l, *coefsP++, sP);
	sP -= CHANNELS;
	mac(l, *coefsN++, sN);
	sN += CHANNELS;
	} while (--count > 0);
	out[0] += volumeAdjust(l, volumeLR[0]);
	out[1] += volumeAdjust(l, volumeLR[1]);
	}		}
			template<typename TC, typename TI>
			inline void acc(TC coef, const TI*& data) {
			mac(value, coef, data++);
			Accumulator<CHANNELS-1, TO>::acc(coef, data);
	}		}
			inline void volume(TO*& out, TO gain) {
			*out++ = volumeAdjust(value, gain);
			Accumulator<CHANNELS-1, TO>::volume(out, gain);
			}

			TO value; // one per recursive inherited base class
			};

			template<typename TO>
			class Accumulator<0, TO> {
			public:
			inline void clear() {
			}
			template<typename TC, typename TI>
			inline void acc(TC coef __unused, const TI*& data __unused) {
			}
			inline void volume(TO*& out __unused, TO gain __unused) {
			}
			};

	/*		/*
	* Calculates a single output frame (two samples) interpolating phase.		* Helper template functions for interpolating filter coefficients.
	*
	* This function computes both the positive half FIR dot product and
	* the negative half FIR dot product, accumulates, and then applies the volume.
	*
	* This is an interpolated phase filter.
	*
	* Use fir() to compute the proper coefficient pointers for a polyphase
	* filter bank.
	*/		*/

	template<typename TC, typename T>		template<typename TC, typename T>
	@@ -159,30 +145,98 @@ int32_t interpolate(int32_t coef_0, int32_t coef_1, uint32_t lerp)
	return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0);		return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0);
	}		}

	template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>		/* class scope for passing in functions into templates */
			struct InterpCompute {
			template<typename TC, typename TINTERP>
	static inline		static inline
	void Process(TO* const out,		TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) {
			return interpolate(coef_0, coef_1, lerp);
			}

			template<typename TC, typename TINTERP>
			static inline
			TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) {
			return interpolate(coef_0, coef_1, lerp);
			}
			};

			struct InterpNull {
			template<typename TC, typename TINTERP>
			static inline
			TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) {
			return coef_0;
			}

			template<typename TC, typename TINTERP>
			static inline
			TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) {
			return coef_1;
			}
			};

			/*
			* Calculates a single output frame (two samples).
			*
			* The Process*() functions compute both the positive half FIR dot product and
			* the negative half FIR dot product, accumulates, and then applies the volume.
			*
			* Use fir() to compute the proper coefficient pointers for a polyphase
			* filter bank.
			*
			* ProcessBase() is the fundamental processing template function.
			*
			* ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase.
			* Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase.
			*/

			template <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP>
			static inline
			void ProcessBase(TO* const out,
	int count,		int count,
	const TC* coefsP,		const TC* coefsP,
	const TC* coefsN,		const TC* coefsN,
	const TC* coefsP1 __unused,
	const TC* coefsN1 __unused,
	const TI* sP,		const TI* sP,
	const TI* sN,		const TI* sN,
	TINTERP lerpP,		TINTERP lerpP,
	const TO* const volumeLR)		const TO* const volumeLR)
	{		{
	COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)		COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0)
	adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation

	if (CHANNELS == 2) {		if (CHANNELS > 2) {
			// TO accum[CHANNELS];
			Accumulator<CHANNELS, TO> accum;

			// for (int j = 0; j < CHANNELS; ++j) accum[j] = 0;
			accum.clear();
			for (size_t i = 0; i < count; ++i) {
			TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP);

			// for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j);
			const TI *tmp_data = sP; // tmp_ptr seems to work better
			accum.acc(c, tmp_data);

			coefsP++;
			sP -= CHANNELS;
			c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP);

			// for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j);
			tmp_data = sN; // tmp_ptr seems faster than directly using sN
			accum.acc(c, tmp_data);

			coefsN++;
			sN += CHANNELS;
			}
			// for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]);
			TO *tmp_out = out; // may remove if const out definition changes.
			accum.volume(tmp_out, volumeLR[0]);
			} else if (CHANNELS == 2) {
	TO l = 0;		TO l = 0;
	TO r = 0;		TO r = 0;
	for (size_t i = 0; i < count; ++i) {		for (size_t i = 0; i < count; ++i) {
	mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP);		mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
	coefsP++;		coefsP++;
	sP -= CHANNELS;		sP -= CHANNELS;
	mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN);		mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
	coefsN++;		coefsN++;
	sN += CHANNELS;		sN += CHANNELS;
	}		}
	@@ -191,10 +245,10 @@ void Process(TO* const out,
	} else { /* CHANNELS == 1 */		} else { /* CHANNELS == 1 */
	TO l = 0;		TO l = 0;
	for (size_t i = 0; i < count; ++i) {		for (size_t i = 0; i < count; ++i) {
	mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP);		mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
	coefsP++;		coefsP++;
	sP -= CHANNELS;		sP -= CHANNELS;
	mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN);		mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
	coefsN++;		coefsN++;
	sN += CHANNELS;		sN += CHANNELS;
	}		}
	@@ -203,6 +257,36 @@ void Process(TO* const out,
	}		}
	}		}

			template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
			static inline
			void ProcessL(TO* const out,
			int count,
			const TC* coefsP,
			const TC* coefsN,
			const TI* sP,
			const TI* sN,
			const TO* const volumeLR)
			{
			ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR);
			}

			template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
			static inline
			void Process(TO* const out,
			int count,
			const TC* coefsP,
			const TC* coefsN,
			const TC* coefsP1 __unused,
			const TC* coefsN1 __unused,
			const TI* sP,
			const TI* sN,
			TINTERP lerpP,
			const TO* const volumeLR)
			{
			adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolations
			ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR);
			}

	/*		/*
	* Calculates a single output frame (two samples) from input sample pointer.		* Calculates a single output frame (two samples) from input sample pointer.
	*		*