Merge "Simplify the implementation of AudioPlaybackHandler." (6075e3df) · Commits · e / os / android_frameworks_base

core/java/android/speech/tts/AudioPlaybackHandler.java

+48 −541

File changed.

Preview size limit exceeded, changes collapsed.

core/java/android/speech/tts/AudioMessageParams.java→core/java/android/speech/tts/AudioPlaybackQueueItem.java

+11 −8

Original line number	Diff line number	Diff line
		@@ -16,23 +16,26 @@
		package android.speech.tts;

		import android.speech.tts.TextToSpeechService.UtteranceProgressDispatcher;
		import android.util.Log;

		class AudioMessageParams extends MessageParams {
		class AudioPlaybackQueueItem extends PlaybackQueueItem {
		private final BlockingMediaPlayer mPlayer;

		AudioMessageParams(UtteranceProgressDispatcher dispatcher,
		AudioPlaybackQueueItem(UtteranceProgressDispatcher dispatcher,
		Object callerIdentity, BlockingMediaPlayer player) {
		super(dispatcher, callerIdentity);
		mPlayer = player;
		}

		BlockingMediaPlayer getPlayer() {
		return mPlayer;
		@Override
		public void run() {
		getDispatcher().dispatchOnStart();
		// TODO: This can be avoided. Will be fixed later in this CL.
		mPlayer.startAndWait();
		getDispatcher().dispatchOnDone();
		}

		@Override
		int getType() {
		return TYPE_AUDIO;
		void stop(boolean isError) {
		mPlayer.stop();
		}

		}

core/java/android/speech/tts/BlockingAudioTrack.java

0 → 100644

+338 −0

Original line number	Diff line number	Diff line
		// Copyright 2011 Google Inc. All Rights Reserved.

		package android.speech.tts;

		import android.media.AudioFormat;
		import android.media.AudioTrack;
		import android.util.Log;

		/**
		* Exposes parts of the {@link AudioTrack} API by delegating calls to an
		* underlying {@link AudioTrack}. Additionally, provides methods like
		* {@link #waitAndRelease()} that will block until all audiotrack
		* data has been flushed to the mixer, and is estimated to have completed
		* playback.
		*/
		class BlockingAudioTrack {
		private static final String TAG = "TTS.BlockingAudioTrack";
		private static final boolean DBG = false;


		/**
		* The minimum increment of time to wait for an AudioTrack to finish
		* playing.
		*/
		private static final long MIN_SLEEP_TIME_MS = 20;

		/**
		* The maximum increment of time to sleep while waiting for an AudioTrack
		* to finish playing.
		*/
		private static final long MAX_SLEEP_TIME_MS = 2500;

		/**
		* The maximum amount of time to wait for an audio track to make progress while
		* it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but
		* could happen in exceptional circumstances like a media_server crash.
		*/
		private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS;

		/**
		* Minimum size of the buffer of the underlying {@link android.media.AudioTrack}
		* we create.
		*/
		private static final int MIN_AUDIO_BUFFER_SIZE = 8192;


		private final int mStreamType;
		private final int mSampleRateInHz;
		private final int mAudioFormat;
		private final int mChannelCount;
		private final float mVolume;
		private final float mPan;

		private final int mBytesPerFrame;
		/**
		* A "short utterance" is one that uses less bytes than the audio
		* track buffer size (mAudioBufferSize). In this case, we need to call
		* {@link AudioTrack#stop()} to send pending buffers to the mixer, and slightly
		* different logic is required to wait for the track to finish.
		*
		* Not volatile, accessed only from the audio playback thread.
		*/
		private boolean mIsShortUtterance;
		/**
		* Will be valid after a call to {@link #init()}.
		*/
		private int mAudioBufferSize;
		private int mBytesWritten = 0;

		private AudioTrack mAudioTrack;
		private volatile boolean mStopped;
		// Locks the initialization / uninitialization of the audio track.
		// This is required because stop() will throw an illegal state exception
		// if called before init() or after mAudioTrack.release().
		private final Object mAudioTrackLock = new Object();

		BlockingAudioTrack(int streamType, int sampleRate,
		int audioFormat, int channelCount,
		float volume, float pan) {
		mStreamType = streamType;
		mSampleRateInHz = sampleRate;
		mAudioFormat = audioFormat;
		mChannelCount = channelCount;
		mVolume = volume;
		mPan = pan;

		mBytesPerFrame = getBytesPerFrame(mAudioFormat) * mChannelCount;
		mIsShortUtterance = false;
		mAudioBufferSize = 0;
		mBytesWritten = 0;

		mAudioTrack = null;
		mStopped = false;
		}

		public void init() {
		AudioTrack track = createStreamingAudioTrack();

		synchronized (mAudioTrackLock) {
		mAudioTrack = track;
		}
		}

		public void stop() {
		synchronized (mAudioTrackLock) {
		if (mAudioTrack != null) {
		mAudioTrack.stop();
		}
		}
		mStopped = true;
		}

		public int write(byte[] data) {
		if (mAudioTrack == null \|\| mStopped) {
		return -1;
		}
		final int bytesWritten = writeToAudioTrack(mAudioTrack, data);
		mBytesWritten += bytesWritten;
		return bytesWritten;
		}

		public void waitAndRelease() {
		// For "small" audio tracks, we have to stop() them to make them mixable,
		// else the audio subsystem will wait indefinitely for us to fill the buffer
		// before rendering the track mixable.
		//
		// If mStopped is true, the track would already have been stopped, so not
		// much point not doing that again.
		if (mBytesWritten < mAudioBufferSize && !mStopped) {
		if (DBG) {
		Log.d(TAG, "Stopping audio track to flush audio, state was : " +
		mAudioTrack.getPlayState() + ",stopped= " + mStopped);
		}

		mIsShortUtterance = true;
		mAudioTrack.stop();
		}

		// Block until the audio track is done only if we haven't stopped yet.
		if (!mStopped) {
		if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + mAudioTrack.hashCode());
		blockUntilDone(mAudioTrack);
		}

		// The last call to AudioTrack.write( ) will return only after
		// all data from the audioTrack has been sent to the mixer, so
		// it's safe to release at this point.
		if (DBG) Log.d(TAG, "Releasing audio track [" + mAudioTrack.hashCode() + "]");
		synchronized (mAudioTrackLock) {
		mAudioTrack.release();
		mAudioTrack = null;
		}
		}


		static int getChannelConfig(int channelCount) {
		if (channelCount == 1) {
		return AudioFormat.CHANNEL_OUT_MONO;
		} else if (channelCount == 2){
		return AudioFormat.CHANNEL_OUT_STEREO;
		}

		return 0;
		}

		long getAudioLengthMs(int numBytes) {
		final int unconsumedFrames = numBytes / mBytesPerFrame;
		final long estimatedTimeMs = unconsumedFrames * 1000 / mSampleRateInHz;

		return estimatedTimeMs;
		}

		private static int writeToAudioTrack(AudioTrack audioTrack, byte[] bytes) {
		if (audioTrack.getPlayState() != AudioTrack.PLAYSTATE_PLAYING) {
		if (DBG) Log.d(TAG, "AudioTrack not playing, restarting : " + audioTrack.hashCode());
		audioTrack.play();
		}

		int count = 0;
		while (count < bytes.length) {
		// Note that we don't take bufferCopy.mOffset into account because
		// it is guaranteed to be 0.
		int written = audioTrack.write(bytes, count, bytes.length);
		if (written <= 0) {
		break;
		}
		count += written;
		}
		return count;
		}

		private AudioTrack createStreamingAudioTrack() {
		final int channelConfig = getChannelConfig(mChannelCount);

		int minBufferSizeInBytes
		= AudioTrack.getMinBufferSize(mSampleRateInHz, channelConfig, mAudioFormat);
		int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);

		AudioTrack audioTrack = new AudioTrack(mStreamType, mSampleRateInHz, channelConfig,
		mAudioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM);
		if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
		Log.w(TAG, "Unable to create audio track.");
		audioTrack.release();
		return null;
		}

		mAudioBufferSize = bufferSizeInBytes;

		setupVolume(audioTrack, mVolume, mPan);
		return audioTrack;
		}

		private static int getBytesPerFrame(int audioFormat) {
		if (audioFormat == AudioFormat.ENCODING_PCM_8BIT) {
		return 1;
		} else if (audioFormat == AudioFormat.ENCODING_PCM_16BIT) {
		return 2;
		}

		return -1;
		}


		private void blockUntilDone(AudioTrack audioTrack) {
		if (mBytesWritten <= 0) {
		return;
		}

		if (mIsShortUtterance) {
		// In this case we would have called AudioTrack#stop() to flush
		// buffers to the mixer. This makes the playback head position
		// unobservable and notification markers do not work reliably. We
		// have no option but to wait until we think the track would finish
		// playing and release it after.
		//
		// This isn't as bad as it looks because (a) We won't end up waiting
		// for much longer than we should because even at 4khz mono, a short
		// utterance weighs in at about 2 seconds, and (b) such short utterances
		// are expected to be relatively infrequent and in a stream of utterances
		// this shows up as a slightly longer pause.
		blockUntilEstimatedCompletion();
		} else {
		blockUntilCompletion(audioTrack);
		}
		}

		private void blockUntilEstimatedCompletion() {
		final int lengthInFrames = mBytesWritten / mBytesPerFrame;
		final long estimatedTimeMs = (lengthInFrames * 1000 / mSampleRateInHz);

		if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance");

		try {
		Thread.sleep(estimatedTimeMs);
		} catch (InterruptedException ie) {
		// Do nothing.
		}
		}

		private void blockUntilCompletion(AudioTrack audioTrack) {
		final int lengthInFrames = mBytesWritten / mBytesPerFrame;

		int previousPosition = -1;
		int currentPosition = 0;
		long blockedTimeMs = 0;

		while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames &&
		audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING && !mStopped) {

		final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) /
		audioTrack.getSampleRate();
		final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS);

		// Check if the audio track has made progress since the last loop
		// iteration. We should then add in the amount of time that was
		// spent sleeping in the last iteration.
		if (currentPosition == previousPosition) {
		// This works only because the sleep time that would have been calculated
		// would be the same in the previous iteration too.
		blockedTimeMs += sleepTimeMs;
		// If we've taken too long to make progress, bail.
		if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) {
		Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " +
		"for AudioTrack to make progress, Aborting");
		break;
		}
		} else {
		blockedTimeMs = 0;
		}
		previousPosition = currentPosition;

		if (DBG) {
		Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," +
		" Playback position : " + currentPosition + ", Length in frames : "
		+ lengthInFrames);
		}
		try {
		Thread.sleep(sleepTimeMs);
		} catch (InterruptedException ie) {
		break;
		}
		}
		}

		private static void setupVolume(AudioTrack audioTrack, float volume, float pan) {
		final float vol = clip(volume, 0.0f, 1.0f);
		final float panning = clip(pan, -1.0f, 1.0f);

		float volLeft = vol;
		float volRight = vol;
		if (panning > 0.0f) {
		volLeft *= (1.0f - panning);
		} else if (panning < 0.0f) {
		volRight *= (1.0f + panning);
		}
		if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
		if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
		Log.e(TAG, "Failed to set volume");
		}
		}

		private static final long clip(long value, long min, long max) {
		if (value < min) {
		return min;
		}

		if (value > max) {
		return max;
		}

		return value;
		}

		private static float clip(float value, float min, float max) {
		return value > max ? max : (value < min ? min : value);
		}

		}

core/java/android/speech/tts/BlockingMediaPlayer.java

+0 −1

Original line number	Diff line number	Diff line
		@@ -54,7 +54,6 @@ class BlockingMediaPlayer {
		mUri = uri;
		mStreamType = streamType;
		mDone = new ConditionVariable();

		}

		/**

core/java/android/speech/tts/EventLogger.java

+6 −4

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@ package android.speech.tts;

		import android.os.SystemClock;
		import android.text.TextUtils;
		import android.util.Log;

		/**
		* Writes data about a given speech synthesis request to the event logs.
		@@ -24,7 +25,7 @@ import android.text.TextUtils;
		* speech rate / pitch and the latency and overall time taken.
		*
		* Note that {@link EventLogger#onStopped()} and {@link EventLogger#onError()}
		* might be called from any thread, but on {@link EventLogger#onPlaybackStart()} and
		* might be called from any thread, but on {@link EventLogger#onAudioDataWritten()} and
		* {@link EventLogger#onComplete()} must be called from a single thread
		* (usually the audio playback thread}
		*/
		@@ -81,10 +82,10 @@ class EventLogger {
		/**
		* Notifies the logger that audio playback has started for some section
		* of the synthesis. This is normally some amount of time after the engine
		* has synthesized data and varides depending on utterances and
		* has synthesized data and varies depending on utterances and
		* other audio currently in the queue.
		*/
		public void onPlaybackStart() {
		public void onAudioDataWritten() {
		// For now, keep track of only the first chunk of audio
		// that was played.
		if (mPlaybackStartTime == -1) {
		@@ -120,7 +121,7 @@ class EventLogger {
		}

		long completionTime = SystemClock.elapsedRealtime();
		// onPlaybackStart() should normally always be called if an
		// onAudioDataWritten() should normally always be called if an
		// error does not occur.
		if (mError \|\| mPlaybackStartTime == -1 \|\| mEngineCompleteTime == -1) {
		EventLogTags.writeTtsSpeakFailure(mServiceApp, mCallerUid, mCallerPid,
		@@ -139,6 +140,7 @@ class EventLogger {
		final long audioLatency = mPlaybackStartTime - mReceivedTime;
		final long engineLatency = mEngineStartTime - mRequestProcessingStartTime;
		final long engineTotal = mEngineCompleteTime - mRequestProcessingStartTime;

		EventLogTags.writeTtsSpeakSuccess(mServiceApp, mCallerUid, mCallerPid,
		getUtteranceLength(), getLocaleString(),
		mRequest.getSpeechRate(), mRequest.getPitch(),