Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6075e3df authored by Narayan Kamath's avatar Narayan Kamath Committed by Android (Google) Code Review
Browse files

Merge "Simplify the implementation of AudioPlaybackHandler."

parents b5119aa7 67ae6bc8
Loading
Loading
Loading
Loading
+48 −541

File changed.

Preview size limit exceeded, changes collapsed.

+11 −8
Original line number Diff line number Diff line
@@ -16,23 +16,26 @@
package android.speech.tts;

import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
import android.util.Log;

class AudioMessageParams extends MessageParams {
class AudioPlaybackQueueItem extends PlaybackQueueItem {
    private final BlockingMediaPlayer mPlayer;

    AudioMessageParams(UtteranceProgressDispatcher dispatcher,
    AudioPlaybackQueueItem(UtteranceProgressDispatcher dispatcher,
            Object callerIdentity, BlockingMediaPlayer player) {
        super(dispatcher, callerIdentity);
        mPlayer = player;
    }

    BlockingMediaPlayer getPlayer() {
        return mPlayer;
    @Override
    public void run() {
        getDispatcher().dispatchOnStart();
        // TODO: This can be avoided. Will be fixed later in this CL.
        mPlayer.startAndWait();
        getDispatcher().dispatchOnDone();
    }

    @Override
    int getType() {
        return TYPE_AUDIO;
    void stop(boolean isError) {
        mPlayer.stop();
    }

}
+338 −0
Original line number Diff line number Diff line
// Copyright 2011 Google Inc. All Rights Reserved.

package android.speech.tts;

import android.media.AudioFormat;
import android.media.AudioTrack;
import android.util.Log;

/**
 * Exposes parts of the {@link AudioTrack} API by delegating calls to an
 * underlying {@link AudioTrack}. Additionally, provides methods like
 * {@link #waitAndRelease()} that will block until all audiotrack
 * data has been flushed to the mixer, and is estimated to have completed
 * playback.
 */
class BlockingAudioTrack {
    private static final String TAG = "TTS.BlockingAudioTrack";
    private static final boolean DBG = false;


    /**
     * The minimum increment of time to wait for an AudioTrack to finish
     * playing.
     */
    private static final long MIN_SLEEP_TIME_MS = 20;

    /**
     * The maximum increment of time to sleep while waiting for an AudioTrack
     * to finish playing.
     */
    private static final long MAX_SLEEP_TIME_MS = 2500;

    /**
     * The maximum amount of time to wait for an audio track to make progress while
     * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
     * could happen in exceptional circumstances like a media_server crash.
     */
    private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;

    /**
     * Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
     * we create.
     */
    private static final int MIN_AUDIO_BUFFER_SIZE = 8192;


    private final int mStreamType;
    private final int mSampleRateInHz;
    private final int mAudioFormat;
    private final int mChannelCount;
    private final float mVolume;
    private final float mPan;

    private final int mBytesPerFrame;
    /**
     * A "short utterance" is one that uses less bytes than the audio
     * track buffer size (mAudioBufferSize). In this case, we need to call
     * {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
     * different logic is required to wait for the track to finish.
     *
     * Not volatile, accessed only from the audio playback thread.
     */
    private boolean mIsShortUtterance;
    /**
     * Will be valid after a call to {@link #init()}.
     */
    private int mAudioBufferSize;
    private int mBytesWritten = 0;

    private AudioTrack mAudioTrack;
    private volatile boolean mStopped;
    // Locks the initialization / uninitialization of the audio track.
    // This is required because stop() will throw an illegal state exception
    // if called before init() or after mAudioTrack.release().
    private final Object mAudioTrackLock = new Object();

    BlockingAudioTrack(int streamType, int sampleRate,
            int audioFormat, int channelCount,
            float volume, float pan) {
        mStreamType = streamType;
        mSampleRateInHz = sampleRate;
        mAudioFormat = audioFormat;
        mChannelCount = channelCount;
        mVolume = volume;
        mPan = pan;

        mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
        mIsShortUtterance = false;
        mAudioBufferSize = 0;
        mBytesWritten = 0;

        mAudioTrack = null;
        mStopped = false;
    }

    public void init() {
        AudioTrack track = createStreamingAudioTrack();

        synchronized (mAudioTrackLock) {
            mAudioTrack = track;
        }
    }

    public void stop() {
        synchronized (mAudioTrackLock) {
            if (mAudioTrack != null) {
                mAudioTrack.stop();
            }
        }
        mStopped = true;
    }

    public int write(byte[] data) {
        if (mAudioTrack == null || mStopped) {
            return -1;
        }
        final int bytesWritten = writeToAudioTrack(mAudioTrack, data);
        mBytesWritten += bytesWritten;
        return bytesWritten;
    }

    public void waitAndRelease() {
        // For "small" audio tracks, we have to stop() them to make them mixable,
        // else the audio subsystem will wait indefinitely for us to fill the buffer
        // before rendering the track mixable.
        //
        // If mStopped is true, the track would already have been stopped, so not
        // much point not doing that again.
        if (mBytesWritten < mAudioBufferSize && !mStopped) {
            if (DBG) {
                Log.d(TAG, "Stopping audio track to flush audio, state was : " +
                        mAudioTrack.getPlayState() + ",stopped= " + mStopped);
            }

            mIsShortUtterance = true;
            mAudioTrack.stop();
        }

        // Block until the audio track is done only if we haven't stopped yet.
        if (!mStopped) {
            if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
            blockUntilDone(mAudioTrack);
        }

        // The last call to AudioTrack.write( ) will return only after
        // all data from the audioTrack has been sent to the mixer, so
        // it's safe to release at this point.
        if (DBG) Log.d(TAG, "Releasing audio track [" + mAudioTrack.hashCode() + "]");
        synchronized (mAudioTrackLock) {
            mAudioTrack.release();
            mAudioTrack = null;
        }
    }


    static int getChannelConfig(int channelCount) {
        if (channelCount == 1) {
            return AudioFormat.CHANNEL_OUT_MONO;
        } else if (channelCount == 2){
            return AudioFormat.CHANNEL_OUT_STEREO;
        }

        return 0;
    }

    long getAudioLengthMs(int numBytes) {
        final int unconsumedFrames = numBytes / mBytesPerFrame;
        final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;

        return estimatedTimeMs;
    }

    private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
        if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
            if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
            audioTrack.play();
        }

        int count = 0;
        while (count < bytes.length) {
            // Note that we don't take bufferCopy.mOffset into account because
            // it is guaranteed to be 0.
            int written = audioTrack.write(bytes, count, bytes.length);
            if (written <= 0) {
                break;
            }
            count += written;
        }
        return count;
    }

    private AudioTrack createStreamingAudioTrack() {
        final int channelConfig = getChannelConfig(mChannelCount);

        int minBufferSizeInBytes
                = AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);

        AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
                mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
            Log.w(TAG, "Unable to create audio track.");
            audioTrack.release();
            return null;
        }

        mAudioBufferSize = bufferSizeInBytes;

        setupVolume(audioTrack, mVolume, mPan);
        return audioTrack;
    }

    private static int getBytesPerFrame(int audioFormat) {
        if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
            return 1;
        } else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
            return 2;
        }

        return -1;
    }


    private void blockUntilDone(AudioTrack audioTrack) {
        if (mBytesWritten <= 0) {
            return;
        }

        if (mIsShortUtterance) {
            // In this case we would have called AudioTrack#stop() to flush
            // buffers to the mixer. This makes the playback head position
            // unobservable and notification markers do not work reliably. We
            // have no option but to wait until we think the track would finish
            // playing and release it after.
            //
            // This isn't as bad as it looks because (a) We won't end up waiting
            // for much longer than we should because even at 4khz mono, a short
            // utterance weighs in at about 2 seconds, and (b) such short utterances
            // are expected to be relatively infrequent and in a stream of utterances
            // this shows up as a slightly longer pause.
            blockUntilEstimatedCompletion();
        } else {
            blockUntilCompletion(audioTrack);
        }
    }

    private void blockUntilEstimatedCompletion() {
        final int lengthInFrames = mBytesWritten / mBytesPerFrame;
        final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);

        if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");

        try {
            Thread.sleep(estimatedTimeMs);
        } catch (InterruptedException ie) {
            // Do nothing.
        }
    }

    private void blockUntilCompletion(AudioTrack audioTrack) {
        final int lengthInFrames = mBytesWritten / mBytesPerFrame;

        int previousPosition = -1;
        int currentPosition = 0;
        long blockedTimeMs = 0;

        while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
                audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {

            final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
                    audioTrack.getSampleRate();
            final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);

            // Check if the audio track has made progress since the last loop
            // iteration. We should then add in the amount of time that was
            // spent sleeping in the last iteration.
            if (currentPosition == previousPosition) {
                // This works only because the sleep time that would have been calculated
                // would be the same in the previous iteration too.
                blockedTimeMs += sleepTimeMs;
                // If we've taken too long to make progress, bail.
                if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
                    Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
                            "for AudioTrack to make progress, Aborting");
                    break;
                }
            } else {
                blockedTimeMs = 0;
            }
            previousPosition = currentPosition;

            if (DBG) {
                Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
                        " Playback position : " + currentPosition + ", Length in frames : "
                        + lengthInFrames);
            }
            try {
                Thread.sleep(sleepTimeMs);
            } catch (InterruptedException ie) {
                break;
            }
        }
    }

    private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
        final float vol = clip(volume, 0.0f, 1.0f);
        final float panning = clip(pan, -1.0f, 1.0f);

        float volLeft = vol;
        float volRight = vol;
        if (panning > 0.0f) {
            volLeft *= (1.0f - panning);
        } else if (panning < 0.0f) {
            volRight *= (1.0f + panning);
        }
        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
            Log.e(TAG, "Failed to set volume");
        }
    }

    private static final long clip(long value, long min, long max) {
        if (value < min) {
            return min;
        }

        if (value > max) {
            return max;
        }

        return value;
    }

    private static float clip(float value, float min, float max) {
        return value > max ? max : (value < min ? min : value);
    }

}
+0 −1
Original line number Diff line number Diff line
@@ -54,7 +54,6 @@ class BlockingMediaPlayer {
        mUri = uri;
        mStreamType = streamType;
        mDone = new ConditionVariable();

    }

    /**
+6 −4
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ package android.speech.tts;

import android.os.SystemClock;
import android.text.TextUtils;
import android.util.Log;

/**
 * Writes data about a given speech synthesis request to the event logs.
@@ -24,7 +25,7 @@ import android.text.TextUtils;
 * speech rate / pitch and the latency and overall time taken.
 *
 * Note that {@link EventLogger#onStopped()} and {@link EventLogger#onError()}
 * might be called from any thread, but on {@link EventLogger#onPlaybackStart()} and
 * might be called from any thread, but on {@link EventLogger#onAudioDataWritten()} and
 * {@link EventLogger#onComplete()} must be called from a single thread
 * (usually the audio playback thread}
 */
@@ -81,10 +82,10 @@ class EventLogger {
    /**
     * Notifies the logger that audio playback has started for some section
     * of the synthesis. This is normally some amount of time after the engine
     * has synthesized data and varides depending on utterances and
     * has synthesized data and varies depending on utterances and
     * other audio currently in the queue.
     */
    public void onPlaybackStart() {
    public void onAudioDataWritten() {
        // For now, keep track of only the first chunk of audio
        // that was played.
        if (mPlaybackStartTime == -1) {
@@ -120,7 +121,7 @@ class EventLogger {
        }

        long completionTime = SystemClock.elapsedRealtime();
        // onPlaybackStart() should normally always be called if an
        // onAudioDataWritten() should normally always be called if an
        // error does not occur.
        if (mError || mPlaybackStartTime == -1 || mEngineCompleteTime == -1) {
            EventLogTags.writeTtsSpeakFailure(mServiceApp, mCallerUid, mCallerPid,
@@ -139,6 +140,7 @@ class EventLogger {
        final long audioLatency = mPlaybackStartTime - mReceivedTime;
        final long engineLatency = mEngineStartTime - mRequestProcessingStartTime;
        final long engineTotal = mEngineCompleteTime - mRequestProcessingStartTime;

        EventLogTags.writeTtsSpeakSuccess(mServiceApp, mCallerUid, mCallerPid,
                getUtteranceLength(), getLocaleString(),
                mRequest.getSpeechRate(), mRequest.getPitch(),
Loading