Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6a3eb6a1 authored by Andy Hung's avatar Andy Hung Committed by Android (Google) Code Review
Browse files

Merge "Add multichannel to audio resample processing"

parents 8382a0da 68ffa200
Loading
Loading
Loading
Loading
+146 −62
Original line number Diff line number Diff line
@@ -44,14 +44,14 @@ static inline
void mac(float& l, float& r, TC coef,  const float* samples)
{
    l += *samples++ * coef;
    r += *samples++ * coef;
    r += *samples * coef;
}

template<typename TC>
static inline
void mac(float& l, TC coef,  const float* samples)
{
    l += *samples++ * coef;
    l += *samples * coef;
}

/* variant for output type TO = int32_t output samples */
@@ -69,62 +69,48 @@ float volumeAdjust(float value, float volume)
}

/*
 * Calculates a single output frame (two samples).
 *
 * This function computes both the positive half FIR dot product and
 * the negative half FIR dot product, accumulates, and then applies the volume.
 * Helper template functions for loop unrolling accumulator operations.
 *
 * This is a locked phase filter (it does not compute the interpolation).
 *
 * Use fir() to compute the proper coefficient pointers for a polyphase
 * filter bank.
 * Unrolling the loops achieves about 2x gain.
 * Using a recursive template rather than an array of TO[] for the accumulator
 * values is an additional 10-20% gain.
 */

template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
static inline
void ProcessL(TO* const out,
        int count,
        const TC* coefsP,
        const TC* coefsN,
        const TI* sP,
        const TI* sN,
        const TO* const volumeLR)
template<int CHANNELS, typename TO>
class Accumulator : public Accumulator<CHANNELS-1, TO> // recursive
{
    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)
    if (CHANNELS == 2) {
        TO l = 0;
        TO r = 0;
        do {
            mac(l, r, *coefsP++, sP);
            sP -= CHANNELS;
            mac(l, r, *coefsN++, sN);
            sN += CHANNELS;
        } while (--count > 0);
        out[0] += volumeAdjust(l, volumeLR[0]);
        out[1] += volumeAdjust(r, volumeLR[1]);
    } else { /* CHANNELS == 1 */
        TO l = 0;
        do {
            mac(l, *coefsP++, sP);
            sP -= CHANNELS;
            mac(l, *coefsN++, sN);
            sN += CHANNELS;
        } while (--count > 0);
        out[0] += volumeAdjust(l, volumeLR[0]);
        out[1] += volumeAdjust(l, volumeLR[1]);
public:
    inline void clear() {
        value = 0;
        Accumulator<CHANNELS-1, TO>::clear();
    }
    template<typename TC, typename TI>
    inline void acc(TC coef, const TI*& data) {
        mac(value, coef, data++);
        Accumulator<CHANNELS-1, TO>::acc(coef, data);
    }
    inline void volume(TO*& out, TO gain) {
        *out++ = volumeAdjust(value, gain);
        Accumulator<CHANNELS-1, TO>::volume(out, gain);
    }

    TO value; // one per recursive inherited base class
};

template<typename TO>
class Accumulator<0, TO> {
public:
    inline void clear() {
    }
    template<typename TC, typename TI>
    inline void acc(TC coef __unused, const TI*& data __unused) {
    }
    inline void volume(TO*& out __unused, TO gain __unused) {
    }
};

/*
 * Calculates a single output frame (two samples) interpolating phase.
 *
 * This function computes both the positive half FIR dot product and
 * the negative half FIR dot product, accumulates, and then applies the volume.
 *
 * This is an interpolated phase filter.
 *
 * Use fir() to compute the proper coefficient pointers for a polyphase
 * filter bank.
 * Helper template functions for interpolating filter coefficients.
 */

template<typename TC, typename T>
@@ -159,30 +145,98 @@ int32_t interpolate(int32_t coef_0, int32_t coef_1, uint32_t lerp)
    return mulAdd(static_cast<int16_t>(lerp), (coef_1-coef_0)<<1, coef_0);
}

template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
/* class scope for passing in functions into templates */
struct InterpCompute {
    template<typename TC, typename TINTERP>
    static inline
void Process(TO* const out,
    TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) {
        return interpolate(coef_0, coef_1, lerp);
    }

    template<typename TC, typename TINTERP>
    static inline
    TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) {
        return interpolate(coef_0, coef_1, lerp);
    }
};

struct InterpNull {
    template<typename TC, typename TINTERP>
    static inline
    TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) {
        return coef_0;
    }

    template<typename TC, typename TINTERP>
    static inline
    TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) {
        return coef_1;
    }
};

/*
 * Calculates a single output frame (two samples).
 *
 * The Process*() functions compute both the positive half FIR dot product and
 * the negative half FIR dot product, accumulates, and then applies the volume.
 *
 * Use fir() to compute the proper coefficient pointers for a polyphase
 * filter bank.
 *
 * ProcessBase() is the fundamental processing template function.
 *
 * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase.
 * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase.
 */

template <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP>
static inline
void ProcessBase(TO* const out,
        int count,
        const TC* coefsP,
        const TC* coefsN,
        const TC* coefsP1 __unused,
        const TC* coefsN1 __unused,
        const TI* sP,
        const TI* sN,
        TINTERP lerpP,
        const TO* const volumeLR)
{
    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS >= 1 && CHANNELS <= 2)
    adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolation
    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0)

    if (CHANNELS == 2) {
    if (CHANNELS > 2) {
        // TO accum[CHANNELS];
        Accumulator<CHANNELS, TO> accum;

        // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0;
        accum.clear();
        for (size_t i = 0; i < count; ++i) {
            TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP);

            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j);
            const TI *tmp_data = sP; // tmp_ptr seems to work better
            accum.acc(c, tmp_data);

            coefsP++;
            sP -= CHANNELS;
            c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP);

            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j);
            tmp_data = sN; // tmp_ptr seems faster than directly using sN
            accum.acc(c, tmp_data);

            coefsN++;
            sN += CHANNELS;
        }
        // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]);
        TO *tmp_out = out; // may remove if const out definition changes.
        accum.volume(tmp_out, volumeLR[0]);
    } else if (CHANNELS == 2) {
        TO l = 0;
        TO r = 0;
        for (size_t i = 0; i < count; ++i) {
            mac(l, r, interpolate(coefsP[0], coefsP[count], lerpP), sP);
            mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
            coefsP++;
            sP -= CHANNELS;
            mac(l, r, interpolate(coefsN[count], coefsN[0], lerpP), sN);
            mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
            coefsN++;
            sN += CHANNELS;
        }
@@ -191,10 +245,10 @@ void Process(TO* const out,
    } else { /* CHANNELS == 1 */
        TO l = 0;
        for (size_t i = 0; i < count; ++i) {
            mac(l, interpolate(coefsP[0], coefsP[count], lerpP), sP);
            mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
            coefsP++;
            sP -= CHANNELS;
            mac(l, interpolate(coefsN[count], coefsN[0], lerpP), sN);
            mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
            coefsN++;
            sN += CHANNELS;
        }
@@ -203,6 +257,36 @@ void Process(TO* const out,
    }
}

template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
static inline
void ProcessL(TO* const out,
        int count,
        const TC* coefsP,
        const TC* coefsN,
        const TI* sP,
        const TI* sN,
        const TO* const volumeLR)
{
    ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR);
}

template <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
static inline
void Process(TO* const out,
        int count,
        const TC* coefsP,
        const TC* coefsN,
        const TC* coefsP1 __unused,
        const TC* coefsN1 __unused,
        const TI* sP,
        const TI* sN,
        TINTERP lerpP,
        const TO* const volumeLR)
{
    adjustLerp<TC, TINTERP>(lerpP); // coefficient type adjustment for interpolations
    ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR);
}

/*
 * Calculates a single output frame (two samples) from input sample pointer.
 *