Improve TTS engine audio buffer API (71e0b480) · Commits · e / os / android_frameworks_base

core/java/android/speech/tts/FileSynthesisRequest.java

+42 −5

Original line number	Diff line number	Diff line
		@@ -19,6 +19,7 @@ import android.media.AudioFormat;
		import android.util.Log;

		import java.io.File;
		import java.io.FileOutputStream;
		import java.io.IOException;
		import java.io.RandomAccessFile;
		import java.nio.ByteBuffer;
		@@ -32,6 +33,8 @@ class FileSynthesisRequest extends SynthesisRequest {
		private static final String TAG = "FileSynthesisRequest";
		private static final boolean DBG = false;

		private static final int MAX_AUDIO_BUFFER_SIZE = 8192;

		private static final int WAV_HEADER_LENGTH = 44;
		private static final short WAV_FORMAT_PCM = 0x0001;

		@@ -80,6 +83,11 @@ class FileSynthesisRequest extends SynthesisRequest {
		}
		}

		@Override
		public int getMaxBufferSize() {
		return MAX_AUDIO_BUFFER_SIZE;
		}

		@Override
		public int start(int sampleRateInHz, int audioFormat, int channelCount) {
		if (DBG) {
		@@ -152,8 +160,9 @@ class FileSynthesisRequest extends SynthesisRequest {
		try {
		// Write WAV header at start of file
		mFile.seek(0);
		int fileLen = (int) mFile.length();
		mFile.write(makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, fileLen));
		int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
		mFile.write(
		makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
		closeFile();
		return TextToSpeech.SUCCESS;
		} catch (IOException ex) {
		@@ -164,8 +173,37 @@ class FileSynthesisRequest extends SynthesisRequest {
		}
		}

		@Override
		public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
		byte[] buffer, int offset, int length) {
		synchronized (mStateLock) {
		if (mStopped) {
		if (DBG) Log.d(TAG, "Request has been aborted.");
		return TextToSpeech.ERROR;
		}
		}
		FileOutputStream out = null;
		try {
		out = new FileOutputStream(mFileName);
		out.write(makeWavHeader(sampleRateInHz, audioFormat, channelCount, length));
		out.write(buffer, offset, length);
		return TextToSpeech.SUCCESS;
		} catch (IOException ex) {
		Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
		return TextToSpeech.ERROR;
		} finally {
		try {
		if (out != null) {
		out.close();
		}
		} catch (IOException ex) {
		Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
		}
		}
		}

		private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
		int fileLength) {
		int dataLength) {
		// TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
		int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
		int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
		@@ -177,7 +215,7 @@ class FileSynthesisRequest extends SynthesisRequest {
		header.order(ByteOrder.LITTLE_ENDIAN);

		header.put(new byte[]{ 'R', 'I', 'F', 'F' });
		header.putInt(fileLength - 8); // RIFF chunk size
		header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size
		header.put(new byte[]{ 'W', 'A', 'V', 'E' });
		header.put(new byte[]{ 'f', 'm', 't', ' ' });
		header.putInt(16); // size of fmt chunk
		@@ -188,7 +226,6 @@ class FileSynthesisRequest extends SynthesisRequest {
		header.putShort(blockAlign);
		header.putShort(bitsPerSample);
		header.put(new byte[]{ 'd', 'a', 't', 'a' });
		int dataLength = fileLength - WAV_HEADER_LENGTH;
		header.putInt(dataLength);

		return headerBuf;

core/java/android/speech/tts/PlaybackSynthesisRequest.java

+86 −25

Original line number	Diff line number	Diff line
		@@ -78,6 +78,13 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
		}
		}

		@Override
		public int getMaxBufferSize() {
		// The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be
		// a safe buffer size to pass in.
		return MIN_AUDIO_BUFFER_SIZE;
		}

		// TODO: add a thread that writes to the AudioTrack?
		@Override
		public int start(int sampleRateInHz, int audioFormat, int channelCount) {
		@@ -86,20 +93,6 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
		+ "," + channelCount + ")");
		}

		int channelConfig;
		if (channelCount == 1) {
		channelConfig = AudioFormat.CHANNEL_OUT_MONO;
		} else if (channelCount == 2){
		channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
		} else {
		Log.e(TAG, "Unsupported number of channels: " + channelCount);
		return TextToSpeech.ERROR;
		}

		int minBufferSizeInBytes
		= AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
		int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);

		synchronized (mStateLock) {
		if (mStopped) {
		if (DBG) Log.d(TAG, "Request has been aborted.");
		@@ -111,22 +104,19 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
		return TextToSpeech.ERROR;
		}

		mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, audioFormat,
		bufferSizeInBytes, AudioTrack.MODE_STREAM);
		if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
		cleanUp();
		mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount,
		AudioTrack.MODE_STREAM);
		if (mAudioTrack == null) {
		return TextToSpeech.ERROR;
		}

		setupVolume();
		}

		return TextToSpeech.SUCCESS;
		}

		private void setupVolume() {
		float vol = clip(mVolume, 0.0f, 1.0f);
		float panning = clip(mPan, -1.0f, 1.0f);
		private void setupVolume(AudioTrack audioTrack, float volume, float pan) {
		float vol = clip(volume, 0.0f, 1.0f);
		float panning = clip(pan, -1.0f, 1.0f);
		float volLeft = vol;
		float volRight = vol;
		if (panning > 0.0f) {
		@@ -135,7 +125,7 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
		volRight *= (1.0f + panning);
		}
		if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
		if (mAudioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
		if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
		Log.e(TAG, "Failed to set volume");
		}
		}
		@@ -148,7 +138,10 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
		public int audioAvailable(byte[] buffer, int offset, int length) {
		if (DBG) {
		Log.d(TAG, "audioAvailable(byte[" + buffer.length + "],"
		+ offset + "," + length + "), thread ID=" + android.os.Process.myTid());
		+ offset + "," + length + ")");
		}
		if (length > getMaxBufferSize()) {
		throw new IllegalArgumentException("buffer is too large (" + length + " bytes)");
		}
		synchronized (mStateLock) {
		if (mStopped) {
		@@ -194,4 +187,72 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
		}
		return TextToSpeech.SUCCESS;
		}

		@Override
		public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
		byte[] buffer, int offset, int length) {
		if (DBG) {
		Log.d(TAG, "completeAudioAvailable(" + sampleRateInHz + "," + audioFormat
		+ "," + channelCount + "byte[" + buffer.length + "],"
		+ offset + "," + length + ")");
		}

		synchronized (mStateLock) {
		if (mStopped) {
		if (DBG) Log.d(TAG, "Request has been aborted.");
		return TextToSpeech.ERROR;
		}
		if (mAudioTrack != null) {
		Log.e(TAG, "start() called before completeAudioAvailable()");
		cleanUp();
		return TextToSpeech.ERROR;
		}

		mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount,
		AudioTrack.MODE_STATIC);
		if (mAudioTrack == null) {
		return TextToSpeech.ERROR;
		}

		try {
		mAudioTrack.write(buffer, offset, length);
		mAudioTrack.play();
		} catch (IllegalStateException ex) {
		Log.e(TAG, "Playback error", ex);
		return TextToSpeech.ERROR;
		} finally {
		cleanUp();
		}
		}

		return TextToSpeech.SUCCESS;
		}

		private AudioTrack createAudioTrack(int sampleRateInHz, int audioFormat, int channelCount,
		int mode) {
		int channelConfig;
		if (channelCount == 1) {
		channelConfig = AudioFormat.CHANNEL_OUT_MONO;
		} else if (channelCount == 2){
		channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
		} else {
		Log.e(TAG, "Unsupported number of channels: " + channelCount);
		return null;
		}

		int minBufferSizeInBytes
		= AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
		int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
		AudioTrack audioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig,
		audioFormat, bufferSizeInBytes, mode);
		if (audioTrack == null) {
		return null;
		}
		if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
		audioTrack.release();
		return null;
		}
		setupVolume(audioTrack, mVolume, mPan);
		return audioTrack;
		}
		}
		No newline at end of file

core/java/android/speech/tts/SynthesisRequest.java

+32 −3

Original line number	Diff line number	Diff line
		@@ -18,6 +18,13 @@ package android.speech.tts;
		/**
		* A request for speech synthesis given to a TTS engine for processing.
		*
		* The engine can provide streaming audio by calling
		* {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
		* {@link #done}.
		*
		* Alternatively, the engine can provide all the audio at once, by using
		* {@link #completeAudioAvailable}.
		*
		* @hide Pending approval
		*/
		public abstract class SynthesisRequest {
		@@ -100,6 +107,12 @@ public abstract class SynthesisRequest {
		return mPitch;
		}

		/**
		* Gets the maximum number of bytes that the TTS engine can pass in a single call of
		* {@link #audioAvailable}. This does not apply to {@link #completeAudioAvailable}.
		*/
		public abstract int getMaxBufferSize();

		/**
		* Aborts the speech request.
		*
		@@ -117,7 +130,7 @@ public abstract class SynthesisRequest {
		* @param sampleRateInHz Sample rate in HZ of the generated audio.
		* @param audioFormat Audio format of the generated audio. Must be one of
		* the ENCODING_ constants defined in {@link android.media.AudioFormat}.
		* @param channelCount The number of channels
		* @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
		* @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
		*/
		public abstract int start(int sampleRateInHz, int audioFormat, int channelCount);
		@@ -131,8 +144,8 @@ public abstract class SynthesisRequest {
		* @param buffer The generated audio data. This method will not hold on to {@code buffer},
		* so the caller is free to modify it after this method returns.
		* @param offset The offset into {@code buffer} where the audio data starts.
		* @param length The number of bytes of audio data in {@code buffer}.
		* Must be less than or equal to {@code buffer.length - offset}.
		* @param length The number of bytes of audio data in {@code buffer}. This must be
		* less than or equal to the return value of {@link #getMaxBufferSize}.
		* @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
		*/
		public abstract int audioAvailable(byte[] buffer, int offset, int length);
		@@ -148,4 +161,20 @@ public abstract class SynthesisRequest {
		*/
		public abstract int done();

		/**
		* The service can call this method instead of using {@link #start}, {@link #audioAvailable}
		* and {@link #done} if all the audio data is available in a single buffer.
		*
		* @param sampleRateInHz Sample rate in HZ of the generated audio.
		* @param audioFormat Audio format of the generated audio. Must be one of
		* the ENCODING_ constants defined in {@link android.media.AudioFormat}.
		* @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
		* @param buffer The generated audio data. This method will not hold on to {@code buffer},
		* so the caller is free to modify it after this method returns.
		* @param offset The offset into {@code buffer} where the audio data starts.
		* @param length The number of bytes of audio data in {@code buffer}.
		* @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
		*/
		public abstract int completeAudioAvailable(int sampleRateInHz, int audioFormat,
		int channelCount, byte[] buffer, int offset, int length);
		}
		No newline at end of file