Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 67ae6bc8 authored by Narayan Kamath's avatar Narayan Kamath
Browse files

Simplify the implementation of AudioPlaybackHandler.

Now uses a queue of runnables, one per synthesis. Also
introduce an abstraction that wraps AudioTrack that implements
the blocking semantics that we desire.

bug:5680699

Change-Id: I34a1248ff05766a7d9b3002055fb5b24aa9f230b
parent 51340e0f
Loading
Loading
Loading
Loading
+48 −541

File changed.

Preview size limit exceeded, changes collapsed.

+11 −8
Original line number Diff line number Diff line
@@ -16,23 +16,26 @@
package android.speech.tts;

import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
import android.util.Log;

class AudioMessageParams extends MessageParams {
class AudioPlaybackQueueItem extends PlaybackQueueItem {
    private final BlockingMediaPlayer mPlayer;

    AudioMessageParams(UtteranceProgressDispatcher dispatcher,
    AudioPlaybackQueueItem(UtteranceProgressDispatcher dispatcher,
            Object callerIdentity, BlockingMediaPlayer player) {
        super(dispatcher, callerIdentity);
        mPlayer = player;
    }

    BlockingMediaPlayer getPlayer() {
        return mPlayer;
    @Override
    public void run() {
        getDispatcher().dispatchOnStart();
        // TODO: This can be avoided. Will be fixed later in this CL.
        mPlayer.startAndWait();
        getDispatcher().dispatchOnDone();
    }

    @Override
    int getType() {
        return TYPE_AUDIO;
    void stop(boolean isError) {
        mPlayer.stop();
    }

}
+338 −0
Original line number Diff line number Diff line
// Copyright 2011 Google Inc. All Rights Reserved.

package android.speech.tts;

import android.media.AudioFormat;
import android.media.AudioTrack;
import android.util.Log;

/**
 * Exposes parts of the {@link AudioTrack} API by delegating calls to an
 * underlying {@link AudioTrack}. Additionally, provides methods like
 * {@link #waitAndRelease()} that will block until all audiotrack
 * data has been flushed to the mixer, and is estimated to have completed
 * playback.
 */
class BlockingAudioTrack {
    private static final String TAG = "TTS.BlockingAudioTrack";
    private static final boolean DBG = false;


    /**
     * The minimum increment of time to wait for an AudioTrack to finish
     * playing.
     */
    private static final long MIN_SLEEP_TIME_MS = 20;

    /**
     * The maximum increment of time to sleep while waiting for an AudioTrack
     * to finish playing.
     */
    private static final long MAX_SLEEP_TIME_MS = 2500;

    /**
     * The maximum amount of time to wait for an audio track to make progress while
     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
     * could happen in exceptional circumstances like a media_server crash.
     */
    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;

    /**
     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
     * we create.
     */
    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;


    private final int mStreamType;
    private final int mSampleRateInHz;
    private final int mAudioFormat;
    private final int mChannelCount;
    private final float mVolume;
    private final float mPan;

    private final int mBytesPerFrame;
    /**
     * A "short utterance" is one that uses less bytes than the audio
     * track buffer size (mAudioBufferSize). In this case, we need to call
     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
     * different logic is required to wait for the track to finish.
     *
     * Not volatile, accessed only from the audio playback thread.
     */
    private boolean mIsShortUtterance;
    /**
     * Will be valid after a call to {@link #init()}.
     */
    private int mAudioBufferSize;
    private int mBytesWritten = 0;

    private AudioTrack mAudioTrack;
    private volatile boolean mStopped;
    // Locks the initialization / uninitialization of the audio track.
    // This is required because stop() will throw an illegal state exception
    // if called before init() or after mAudioTrack.release().
    private final Object mAudioTrackLock = new Object();

    BlockingAudioTrack(int streamType, int sampleRate,
            int audioFormat, int channelCount,
            float volume, float pan) {
        mStreamType = streamType;
        mSampleRateInHz = sampleRate;
        mAudioFormat = audioFormat;
        mChannelCount = channelCount;
        mVolume = volume;
        mPan = pan;

        mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
        mIsShortUtterance = false;
        mAudioBufferSize = 0;
        mBytesWritten = 0;

        mAudioTrack = null;
        mStopped = false;
    }

    public void init() {
        AudioTrack track = createStreamingAudioTrack();

        synchronized (mAudioTrackLock) {
            mAudioTrack = track;
        }
    }

    public void stop() {
        synchronized (mAudioTrackLock) {
            if (mAudioTrack != null) {
                mAudioTrack.stop();
            }
        }
        mStopped = true;
    }

    public int write(byte[] data) {
        if (mAudioTrack == null || mStopped) {
            return -1;
        }
        final int bytesWritten = writeToAudioTrack(mAudioTrack, data);
        mBytesWritten += bytesWritten;
        return bytesWritten;
    }

    public void waitAndRelease() {
        // For "small" audio tracks, we have to stop() them to make them mixable,
        // else the audio subsystem will wait indefinitely for us to fill the buffer
        // before rendering the track mixable.
        //
        // If mStopped is true, the track would already have been stopped, so not
        // much point not doing that again.
        if (mBytesWritten < mAudioBufferSize && !mStopped) {
            if (DBG) {
                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
                        mAudioTrack.getPlayState() + ",stopped= " + mStopped);
            }

            mIsShortUtterance = true;
            mAudioTrack.stop();
        }

        // Block until the audio track is done only if we haven't stopped yet.
        if (!mStopped) {
            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
            blockUntilDone(mAudioTrack);
        }

        // The last call to AudioTrack.write( ) will return only after
        // all data from the audioTrack has been sent to the mixer, so
        // it's safe to release at this point.
        if (DBG) Log.d(TAG, "Releasing audio track [" + mAudioTrack.hashCode() + "]");
        synchronized (mAudioTrackLock) {
            mAudioTrack.release();
            mAudioTrack = null;
        }
    }


    static int getChannelConfig(int channelCount) {
        if (channelCount == 1) {
            return AudioFormat.CHANNEL_OUT_MONO;
        } else if (channelCount == 2){
            return AudioFormat.CHANNEL_OUT_STEREO;
        }

        return 0;
    }

    long getAudioLengthMs(int numBytes) {
        final int unconsumedFrames = numBytes / mBytesPerFrame;
        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;

        return estimatedTimeMs;
    }

    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
            audioTrack.play();
        }

        int count = 0;
        while (count < bytes.length) {
            // Note that we don't take bufferCopy.mOffset into account because
            // it is guaranteed to be 0.
            int written = audioTrack.write(bytes, count, bytes.length);
            if (written <= 0) {
                break;
            }
            count += written;
        }
        return count;
    }

    private AudioTrack createStreamingAudioTrack() {
        final int channelConfig = getChannelConfig(mChannelCount);

        int minBufferSizeInBytes
                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);

        AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
                mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
            Log.w(TAG, "Unable to create audio track.");
            audioTrack.release();
            return null;
        }

        mAudioBufferSize = bufferSizeInBytes;

        setupVolume(audioTrack, mVolume, mPan);
        return audioTrack;
    }

    private static int getBytesPerFrame(int audioFormat) {
        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
            return 1;
        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
            return 2;
        }

        return -1;
    }


    private void blockUntilDone(AudioTrack audioTrack) {
        if (mBytesWritten <= 0) {
            return;
        }

        if (mIsShortUtterance) {
            // In this case we would have called AudioTrack#stop() to flush
            // buffers to the mixer. This makes the playback head position
            // unobservable and notification markers do not work reliably. We
            // have no option but to wait until we think the track would finish
            // playing and release it after.
            //
            // This isn't as bad as it looks because (a) We won't end up waiting
            // for much longer than we should because even at 4khz mono, a short
            // utterance weighs in at about 2 seconds, and (b) such short utterances
            // are expected to be relatively infrequent and in a stream of utterances
            // this shows up as a slightly longer pause.
            blockUntilEstimatedCompletion();
        } else {
            blockUntilCompletion(audioTrack);
        }
    }

    private void blockUntilEstimatedCompletion() {
        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);

        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");

        try {
            Thread.sleep(estimatedTimeMs);
        } catch (InterruptedException ie) {
            // Do nothing.
        }
    }

    private void blockUntilCompletion(AudioTrack audioTrack) {
        final int lengthInFrames = mBytesWritten / mBytesPerFrame;

        int previousPosition = -1;
        int currentPosition = 0;
        long blockedTimeMs = 0;

        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {

            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
                    audioTrack.getSampleRate();
            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);

            // Check if the audio track has made progress since the last loop
            // iteration. We should then add in the amount of time that was
            // spent sleeping in the last iteration.
            if (currentPosition == previousPosition) {
                // This works only because the sleep time that would have been calculated
                // would be the same in the previous iteration too.
                blockedTimeMs += sleepTimeMs;
                // If we've taken too long to make progress, bail.
                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
                            "for AudioTrack to make progress, Aborting");
                    break;
                }
            } else {
                blockedTimeMs = 0;
            }
            previousPosition = currentPosition;

            if (DBG) {
                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
                        " Playback position : " + currentPosition + ", Length in frames : "
                        + lengthInFrames);
            }
            try {
                Thread.sleep(sleepTimeMs);
            } catch (InterruptedException ie) {
                break;
            }
        }
    }

    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
        final float vol = clip(volume, 0.0f, 1.0f);
        final float panning = clip(pan, -1.0f, 1.0f);

        float volLeft = vol;
        float volRight = vol;
        if (panning > 0.0f) {
            volLeft *= (1.0f - panning);
        } else if (panning < 0.0f) {
            volRight *= (1.0f + panning);
        }
        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
            Log.e(TAG, "Failed to set volume");
        }
    }

    private static final long clip(long value, long min, long max) {
        if (value < min) {
            return min;
        }

        if (value > max) {
            return max;
        }

        return value;
    }

    private static float clip(float value, float min, float max) {
        return value > max ? max : (value < min ? min : value);
    }

}
+0 −1
Original line number Diff line number Diff line
@@ -54,7 +54,6 @@ class BlockingMediaPlayer {
        mUri = uri;
        mStreamType = streamType;
        mDone = new ConditionVariable();

    }

    /**
+6 −4
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ package android.speech.tts;

import android.os.SystemClock;
import android.text.TextUtils;
import android.util.Log;

/**
 * Writes data about a given speech synthesis request to the event logs.
@@ -24,7 +25,7 @@ import android.text.TextUtils;
 * speech rate / pitch and the latency and overall time taken.
 *
 * Note that {@link EventLogger#onStopped()} and {@link EventLogger#onError()}
 * might be called from any thread, but on {@link EventLogger#onPlaybackStart()} and
 * might be called from any thread, but on {@link EventLogger#onAudioDataWritten()} and
 * {@link EventLogger#onComplete()} must be called from a single thread
 * (usually the audio playback thread}
 */
@@ -81,10 +82,10 @@ class EventLogger {
    /**
     * Notifies the logger that audio playback has started for some section
     * of the synthesis. This is normally some amount of time after the engine
     * has synthesized data and varides depending on utterances and
     * has synthesized data and varies depending on utterances and
     * other audio currently in the queue.
     */
    public void onPlaybackStart() {
    public void onAudioDataWritten() {
        // For now, keep track of only the first chunk of audio
        // that was played.
        if (mPlaybackStartTime == -1) {
@@ -120,7 +121,7 @@ class EventLogger {
        }

        long completionTime = SystemClock.elapsedRealtime();
        // onPlaybackStart() should normally always be called if an
        // onAudioDataWritten() should normally always be called if an
        // error does not occur.
        if (mError || mPlaybackStartTime == -1 || mEngineCompleteTime == -1) {
            EventLogTags.writeTtsSpeakFailure(mServiceApp, mCallerUid, mCallerPid,
@@ -139,6 +140,7 @@ class EventLogger {
        final long audioLatency = mPlaybackStartTime - mReceivedTime;
        final long engineLatency = mEngineStartTime - mRequestProcessingStartTime;
        final long engineTotal = mEngineCompleteTime - mRequestProcessingStartTime;

        EventLogTags.writeTtsSpeakSuccess(mServiceApp, mCallerUid, mCallerPid,
                getUtteranceLength(), getLocaleString(),
                mRequest.getSpeechRate(), mRequest.getPitch(),
Loading