Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1c2689ef authored by Phil Burk's avatar Phil Burk
Browse files

aaudio: handle DSP stalls in clock model

The DSP can sometimes stall while playing.
This might happen when switching internal routes.
That can cause a large jump forward in the timestamps
passed to the IsochronousCLockModel.

When that happens we need to immediately move
the window of the clock model forward and let it
recalibrate using early timestamps.

Otherwise we can end up with the DSP in the middle of the buffer
and a clock model that is drifting too slowly towards
a correct timing window.
That can result in buzzing and clicks depending on the BufferSize.

Also fix test_clock_model.cpp
Added better simulation of timestamp jitter.
Look for position in a range.
Drift more slowly.
Simulate a pause in the DSP.

Bug: 264949085
Bug: 214121016
Bug: 200134791
Test: See bug for repro steps
Test: atest AAudioTests
Test: atest test_clock_model
Change-Id: I9edf4d71ca99ef5827c3c368603a96c52491aad6
parent d24e38bb
Loading
Loading
Loading
Loading
+87 −38
Original line number Diff line number Diff line
@@ -43,12 +43,12 @@ using namespace android::audio_utils;
// and dumped to the log when the stream is stopped.

IsochronousClockModel::IsochronousClockModel()
        : mLatenessForDriftNanos(kInitialLatenessForDriftNanos)
{
    if ((AAudioProperty_getLogMask() & AAUDIO_LOG_CLOCK_MODEL_HISTOGRAM) != 0) {
        mHistogramMicros = std::make_unique<Histogram>(kHistogramBinCount,
                kHistogramBinWidthMicros);
    }
    update();
}

void IsochronousClockModel::setPositionAndTime(int64_t framePosition, int64_t nanoTime) {
@@ -61,15 +61,19 @@ void IsochronousClockModel::start(int64_t nanoTime) {
    ALOGV("start(nanos = %lld)\n", (long long) nanoTime);
    mMarkerNanoTime = nanoTime;
    mState = STATE_STARTING;
    mConsecutiveVeryLateCount = 0;
    mDspStallCount = 0;
    if (mHistogramMicros) {
        mHistogramMicros->clear();
    }
}

void IsochronousClockModel::stop(int64_t nanoTime) {
    ALOGD("stop(nanos = %lld) max lateness = %d micros\n",
    ALOGD("stop(nanos = %lld) max lateness = %d micros, DSP stalled %d times",
          (long long) nanoTime,
        (int) (mMaxMeasuredLatenessNanos / 1000));
          (int) (mMaxMeasuredLatenessNanos / 1000),
          mDspStallCount
    );
    setPositionAndTime(convertTimeToPosition(nanoTime), nanoTime);
    // TODO should we set position?
    mState = STATE_STOPPED;
@@ -108,7 +112,9 @@ void IsochronousClockModel::processTimestamp(int64_t framePosition, int64_t nano

//    ALOGD("processTimestamp() - mSampleRate = %d", mSampleRate);
//    ALOGD("processTimestamp() - mState = %d", mState);
    // Lateness relative to the start of the window.
    int64_t latenessNanos = nanosDelta - expectedNanosDelta;
    int32_t nextConsecutiveVeryLateCount = 0;
    switch (mState) {
    case STATE_STOPPED:
        break;
@@ -137,58 +143,94 @@ void IsochronousClockModel::processTimestamp(int64_t framePosition, int64_t nano
            // Or we may be drifting due to a fast HW clock.
            setPositionAndTime(framePosition, nanoTime);
#if ICM_LOG_DRIFT
            int earlyDeltaMicros = (int) ((expectedNanosDelta - nanosDelta)/ 1000);
            ALOGD("%s() - STATE_RUNNING - #%d, %4d micros EARLY",
            int earlyDeltaMicros = (int) ((expectedNanosDelta - nanosDelta)
                    / AAUDIO_NANOS_PER_MICROSECOND);
            ALOGD("%s() - STATE_RUNNING - #%d, %5d micros EARLY",
                __func__, mTimestampCount, earlyDeltaMicros);
#endif
        } else if (latenessNanos > mLatenessForDriftNanos) {
            // When we are on the late side, it may be because of preemption in the kernel,
            // or timing jitter caused by resampling in the DSP,
            // or we may be drifting due to a slow HW clock.
            // We add slight drift value just in case there is actual long term drift
            // forward caused by a slower clock.
            // If the clock is faster than the model will get pushed earlier
            // by the code in the earlier branch.
            // The two opposing forces should allow the model to track the real clock
            // over a long time.
            int64_t driftingTime = mMarkerNanoTime + expectedNanosDelta + kDriftNanos;
            setPositionAndTime(framePosition,  driftingTime);
#if ICM_LOG_DRIFT
            ALOGD("%s() - STATE_RUNNING - #%d, DRIFT, lateness = %d micros",
        } else if (latenessNanos > mLatenessForJumpNanos) {
            ALOGD("%s() - STATE_RUNNING - #%d, %5d micros VERY LATE, %d times",
                  __func__,
                  mTimestampCount,
                  (int) (latenessNanos / 1000));
#endif
                  (int) (latenessNanos / AAUDIO_NANOS_PER_MICROSECOND),
                  mConsecutiveVeryLateCount
            );
            // A lateness this large is probably due to a stall in the DSP.
            // A pause causes a persistent lateness so we can detect it by counting
            // consecutive late timestamps.
            if (mConsecutiveVeryLateCount >= kVeryLateCountsNeededToTriggerJump) {
                // Assume the timestamp is valid and let subsequent EARLY timestamps
                // move the window quickly to the correct place.
                setPositionAndTime(framePosition, nanoTime); // JUMP!
                mDspStallCount++;
                // Throttle the warnings but do not silence them.
                // They indicate a bug that needs to be fixed!
                if ((nanoTime - mLastJumpWarningTimeNanos) > AAUDIO_NANOS_PER_SECOND) {
                    ALOGW("%s() - STATE_RUNNING - #%d, %5d micros VERY LATE! Force window jump"
                          ", mDspStallCount = %d",
                          __func__,
                          mTimestampCount,
                          (int) (latenessNanos / AAUDIO_NANOS_PER_MICROSECOND),
                          mDspStallCount
                    );
                    mLastJumpWarningTimeNanos = nanoTime;
                }
            } else {
                nextConsecutiveVeryLateCount = mConsecutiveVeryLateCount + 1;
                driftForward(latenessNanos, expectedNanosDelta, framePosition);
            }
        } else if (latenessNanos > mLatenessForDriftNanos) {
            driftForward(latenessNanos, expectedNanosDelta, framePosition);
        }
        mConsecutiveVeryLateCount = nextConsecutiveVeryLateCount;

        // Modify mMaxMeasuredLatenessNanos.
        // This affects the "late" side of the window, which controls input glitches.
        if (latenessNanos > mMaxMeasuredLatenessNanos) { // increase
#if ICM_LOG_DRIFT
            ALOGD("%s() - STATE_RUNNING - #%d, newmax %d - oldmax %d = %4d micros LATE",
            ALOGD("%s() - STATE_RUNNING - #%d, newmax %d, oldmax %d micros LATE",
                    __func__,
                    mTimestampCount,
                    (int) (latenessNanos / 1000),
                    mMaxMeasuredLatenessNanos / 1000,
                    (int) ((latenessNanos - mMaxMeasuredLatenessNanos) / 1000)
                    (int) (latenessNanos / AAUDIO_NANOS_PER_MICROSECOND),
                    (int) (mMaxMeasuredLatenessNanos / AAUDIO_NANOS_PER_MICROSECOND)
                    );
#endif
            mMaxMeasuredLatenessNanos = (int32_t) latenessNanos;
            // Calculate upper region that will trigger a drift forwards.
            mLatenessForDriftNanos = mMaxMeasuredLatenessNanos - (mMaxMeasuredLatenessNanos >> 4);
        } else { // decrease
            // If this is an outlier in lateness then mMaxMeasuredLatenessNanos can go high
            // and stay there. So we slowly reduce mMaxMeasuredLatenessNanos for better
            // long term stability. The two opposing forces will keep mMaxMeasuredLatenessNanos
            // within a reasonable range.
            mMaxMeasuredLatenessNanos -= kDriftNanos;
        }

        break;
    default:
        break;
    }
}

// When we are on the late side, it may be because of preemption in the kernel,
// or timing jitter caused by resampling in the DSP,
// or we may be drifting due to a slow HW clock.
// We add slight drift value just in case there is actual long term drift
// forward caused by a slower clock.
// If the clock is faster than the model will get pushed earlier
// by the code in the earlier branch.
// The two opposing forces should allow the model to track the real clock
// over a long time.
void IsochronousClockModel::driftForward(int64_t latenessNanos,
                                         int64_t expectedNanosDelta,
                                         int64_t framePosition) {
    const int64_t driftNanos = (latenessNanos - mLatenessForDriftNanos) >> kShifterForDrift;
    const int64_t minDriftNanos = std::min(driftNanos, kMaxDriftNanos);
    const int64_t expectedMarkerNanoTime = mMarkerNanoTime + expectedNanosDelta;
    const int64_t driftedTime = expectedMarkerNanoTime + minDriftNanos;
    setPositionAndTime(framePosition, driftedTime);
#if ICM_LOG_DRIFT
    ALOGD("%s() - STATE_RUNNING - #%d, %5d micros LATE, nudge window forward by %d micros",
          __func__,
          mTimestampCount,
          (int) (latenessNanos / AAUDIO_NANOS_PER_MICROSECOND),
          (int) (minDriftNanos / AAUDIO_NANOS_PER_MICROSECOND)
    );
#endif
}

void IsochronousClockModel::setSampleRate(int32_t sampleRate) {
    mSampleRate = sampleRate;
    update();
@@ -197,11 +239,18 @@ void IsochronousClockModel::setSampleRate(int32_t sampleRate) {
void IsochronousClockModel::setFramesPerBurst(int32_t framesPerBurst) {
    mFramesPerBurst = framesPerBurst;
    update();
    ALOGD("%s() - mFramesPerBurst = %d - mBurstPeriodNanos = %" PRId64,
          __func__,
          mFramesPerBurst,
          mBurstPeriodNanos
          );
}

// Update expected lateness based on sampleRate and framesPerBurst
void IsochronousClockModel::update() {
    mBurstPeriodNanos = convertDeltaPositionToTime(mFramesPerBurst); // uses mSampleRate
    mBurstPeriodNanos = convertDeltaPositionToTime(mFramesPerBurst);
    mLatenessForDriftNanos = mBurstPeriodNanos + kLatenessMarginForSchedulingJitter;
    mLatenessForJumpNanos = mLatenessForDriftNanos * kScalerForJumpLateness;
}

int64_t IsochronousClockModel::convertDeltaPositionToTime(int64_t framesDelta) const {
@@ -257,11 +306,11 @@ int64_t IsochronousClockModel::convertLatestTimeToPosition(int64_t nanoTime) con
}

void IsochronousClockModel::dump() const {
    ALOGD("mMarkerFramePosition = %" PRIu64, mMarkerFramePosition);
    ALOGD("mMarkerNanoTime      = %" PRIu64, mMarkerNanoTime);
    ALOGD("mMarkerFramePosition = %" PRId64, mMarkerFramePosition);
    ALOGD("mMarkerNanoTime      = %" PRId64, mMarkerNanoTime);
    ALOGD("mSampleRate          = %6d", mSampleRate);
    ALOGD("mFramesPerBurst      = %6d", mFramesPerBurst);
    ALOGD("mMaxMeasuredLatenessNanos = %6d", mMaxMeasuredLatenessNanos);
    ALOGD("mMaxMeasuredLatenessNanos = %6" PRId64, mMaxMeasuredLatenessNanos);
    ALOGD("mState               = %6d", mState);
}

+30 −11
Original line number Diff line number Diff line
@@ -129,6 +129,9 @@ public:

private:

    void driftForward(int64_t latenessNanos,
                      int64_t expectedNanosDelta,
                      int64_t framePosition);
    int32_t getLateTimeOffsetNanos() const;
    void update();

@@ -139,28 +142,44 @@ private:
        STATE_RUNNING
    };

    // Amount of time to drift forward when we get a late timestamp.
    static constexpr int32_t   kDriftNanos         =   1 * 1000;
    // Maximum amount of time to drift forward when we get a late timestamp.
    static constexpr int64_t   kMaxDriftNanos      = 10 * AAUDIO_NANOS_PER_MICROSECOND;
    // Safety margin to add to the late edge of the timestamp window.
    static constexpr int32_t   kExtraLatenessNanos = 100 * 1000;
    // Initial small threshold for causing a drift later in time.
    static constexpr int32_t   kInitialLatenessForDriftNanos = 10 * 1000;
    static constexpr int32_t   kExtraLatenessNanos = 100 * AAUDIO_NANOS_PER_MICROSECOND;
    // Predicted lateness due to scheduling jitter in the HAL timestamp collection.
    static constexpr int32_t   kLatenessMarginForSchedulingJitter
            = 1000 * AAUDIO_NANOS_PER_MICROSECOND;
    // Amount we multiply mLatenessForDriftNanos to get mLatenessForJumpNanos.
    // This determines when we go from thinking the clock is drifting to
    // when it has actually paused briefly.
    static constexpr int32_t   kScalerForJumpLateness = 5;
    // Amount to divide lateness past the expected burst window to generate
    // the drift value for the window. This is meant to be a very slight nudge forward.
    static constexpr int32_t   kShifterForDrift = 6; // divide by 2^N
    static constexpr int32_t   kVeryLateCountsNeededToTriggerJump = 2;

    static constexpr int32_t   kHistogramBinWidthMicros = 50;
    static constexpr int32_t   kHistogramBinCount       = 128;

    int64_t             mMarkerFramePosition{0}; // Estimated HW position.
    int64_t             mMarkerNanoTime{0};      // Estimated HW time.
    int32_t             mSampleRate{48000};
    int32_t             mFramesPerBurst{48};     // number of frames transferred at one time.
    int32_t             mBurstPeriodNanos{0};    // Time between HW bursts.
    int64_t             mBurstPeriodNanos{0};    // Time between HW bursts.
    // Includes mBurstPeriodNanos because we sample randomly over time.
    int32_t             mMaxMeasuredLatenessNanos{0};
    int64_t             mMaxMeasuredLatenessNanos{0};
    // Threshold for lateness that triggers a drift later in time.
    int32_t             mLatenessForDriftNanos;
    int64_t             mLatenessForDriftNanos{0}; // Set in update()
    // Based on the observed lateness when the DSP is paused for playing a touch sound.
    int64_t             mLatenessForJumpNanos{0}; // Set in update()
    int64_t             mLastJumpWarningTimeNanos{0}; // For throttling warnings.

    int32_t             mSampleRate{48000};
    int32_t             mFramesPerBurst{48};     // number of frames transferred at one time.
    int32_t             mConsecutiveVeryLateCount{0};  // To detect persistent DSP lateness.

    clock_model_state_t mState{STATE_STOPPED};   // State machine handles startup sequence.

    int32_t             mTimestampCount = 0;  // For logging.
    int32_t             mDspStallCount = 0;  // For logging.

    // distribution of timestamps relative to earliest
    std::unique_ptr<android::audio_utils::Histogram>   mHistogramMicros;
+63 −19
Original line number Diff line number Diff line
@@ -30,7 +30,8 @@ using namespace aaudio;
// We can use arbitrary values here because we are not opening a real audio stream.
#define SAMPLE_RATE             48000
#define HW_FRAMES_PER_BURST     48
#define NANOS_PER_BURST         (NANOS_PER_SECOND * HW_FRAMES_PER_BURST / SAMPLE_RATE)
// Sometimes we need a (double) value to avoid misguided Build warnings.
#define NANOS_PER_BURST         ((double) NANOS_PER_SECOND * HW_FRAMES_PER_BURST / SAMPLE_RATE)

class ClockModelTestFixture: public ::testing::Test {
public:
@@ -49,10 +50,20 @@ public:
        // cleanup any pending stuff, but no exceptions allowed
    }

    // Test processing of timestamps when the hardware may be slightly off from
    // the expected sample rate.
    void checkDriftingClock(double hardwareFramesPerSecond, int numLoops) {
    /** Test processing of timestamps when the hardware may be slightly off from
     * the expected sample rate.
     * @param hardwareFramesPerSecond  sample rate that may be slightly off
     * @param numLoops number of iterations
     * @param hardwarePauseTime  number of seconds to jump forward at halfway point
     */
    void checkDriftingClock(double hardwareFramesPerSecond,
                            int numLoops,
                            double hardwarePauseTime = 0.0) {
        int checksToSkip = 0;
        const int64_t startTimeNanos = 500000000; // arbitrary
        int64_t jumpOffsetNanos = 0;

        srand48(123456); // arbitrary seed for repeatable test results
        model.start(startTimeNanos);

        const int64_t startPositionFrames = HW_FRAMES_PER_BURST; // hardware
@@ -64,7 +75,7 @@ public:
        model.processTimestamp(startPositionFrames, markerTime);
        ASSERT_EQ(startPositionFrames, model.convertTimeToPosition(markerTime));

        double elapsedTimeSeconds = startTimeNanos / (double) NANOS_PER_SECOND;
        double elapsedTimeSeconds = 0.0;
        for (int i = 0; i < numLoops; i++) {
            // Calculate random delay over several bursts.
            const double timeDelaySeconds = 10.0 * drand48() * NANOS_PER_BURST / NANOS_PER_SECOND;
@@ -75,12 +86,37 @@ public:
            const int64_t currentTimeFrames = startPositionFrames +
                                        (int64_t)(hardwareFramesPerSecond * elapsedTimeSeconds);
            const int64_t numBursts = currentTimeFrames / HW_FRAMES_PER_BURST;
            const int64_t alignedPosition = startPositionFrames + (numBursts * HW_FRAMES_PER_BURST);
            const int64_t hardwarePosition = startPositionFrames
                    + (numBursts * HW_FRAMES_PER_BURST);

            // Apply drifting timestamp.
            model.processTimestamp(alignedPosition, currentTimeNanos);
            // Simulate a pause in the DSP where the position freezes for a length of time.
            if (i == numLoops / 2) {
                jumpOffsetNanos = (int64_t)(hardwarePauseTime * NANOS_PER_SECOND);
                checksToSkip = 5; // Give the model some time to catch up.
            }

            ASSERT_EQ(alignedPosition, model.convertTimeToPosition(currentTimeNanos));
            // Apply drifting timestamp. Add a random time to simulate the
            // random sampling of the clock that occurs when polling the DSP clock.
            int64_t sampledTimeNanos = (int64_t) (currentTimeNanos
                    + jumpOffsetNanos
                    + (drand48() * NANOS_PER_BURST));
            model.processTimestamp(hardwarePosition, sampledTimeNanos);

            if (checksToSkip > 0) {
                checksToSkip--;
            } else {
                // When the model is drifting it may be pushed forward or backward.
                const int64_t modelPosition = model.convertTimeToPosition(sampledTimeNanos);
                if (hardwareFramesPerSecond >= SAMPLE_RATE) { // fast hardware
                    ASSERT_LE(hardwarePosition - HW_FRAMES_PER_BURST, modelPosition);
                    ASSERT_GE(hardwarePosition + HW_FRAMES_PER_BURST, modelPosition);
                } else {
                    // Slow hardware. If this fails then the model may be drifting
                    // forward in time too slowly. Increase kDriftNanos.
                    ASSERT_LE(hardwarePosition, modelPosition);
                    ASSERT_GE(hardwarePosition + (2 * HW_FRAMES_PER_BURST), modelPosition);
                }
            }
        }
    }

@@ -144,23 +180,31 @@ TEST_F(ClockModelTestFixture, clock_timestamp) {
    EXPECT_EQ(position, model.convertTimeToPosition(markerTime + (73 * NANOS_PER_MICROSECOND)));

    // convertPositionToTime rounds up
    EXPECT_EQ(markerTime + NANOS_PER_BURST, model.convertPositionToTime(position + 17));
    EXPECT_EQ(markerTime + (int64_t)NANOS_PER_BURST, model.convertPositionToTime(position + 17));
}

#define NUM_LOOPS_DRIFT   10000
#define NUM_LOOPS_DRIFT   200000

// test nudging the window by using a drifting HW clock
TEST_F(ClockModelTestFixture, clock_no_drift) {
    checkDriftingClock(SAMPLE_RATE, NUM_LOOPS_DRIFT);
}

// These slow drift rates caused errors when I disabled the code that handles
// drifting in the clock model. So I think the test is valid.
// Test drifting hardware clocks.
// It is unlikely that real hardware would be off by more than this amount.

// Test a slow clock. This will cause the times to be later than expected.
// This will push the clock model window forward and cause it to drift.
TEST_F(ClockModelTestFixture, clock_slow_drift) {
    checkDriftingClock(0.998 * SAMPLE_RATE, NUM_LOOPS_DRIFT);
    checkDriftingClock(0.99998 * SAMPLE_RATE, NUM_LOOPS_DRIFT);
}

// Test a fast hardware clock. This will cause the times to be earlier
// than expected. This will cause the clock model to jump backwards quickly.
TEST_F(ClockModelTestFixture, clock_fast_drift) {
    checkDriftingClock(1.002 * SAMPLE_RATE, NUM_LOOPS_DRIFT);
    checkDriftingClock(1.00002 * SAMPLE_RATE, NUM_LOOPS_DRIFT);
}

// Simulate a pause in the DSP, which can occur if the DSP reroutes the audio.
TEST_F(ClockModelTestFixture, clock_jump_forward_500) {
    checkDriftingClock(SAMPLE_RATE, NUM_LOOPS_DRIFT, 0.500);
}