Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3ad604b3 authored by Bjorn Bringert's avatar Bjorn Bringert Committed by Android (Google) Code Review
Browse files

Merge "Improve TTS engine audio buffer API"

parents 702acacf 71e0b480
Loading
Loading
Loading
Loading
+42 −5
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ import android.media.AudioFormat;
import android.util.Log;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
@@ -32,6 +33,8 @@ class FileSynthesisRequest extends SynthesisRequest {
    private static final String TAG = "FileSynthesisRequest";
    private static final boolean DBG = false;

    private static final int MAX_AUDIO_BUFFER_SIZE = 8192;

    private static final int WAV_HEADER_LENGTH = 44;
    private static final short WAV_FORMAT_PCM = 0x0001;

@@ -80,6 +83,11 @@ class FileSynthesisRequest extends SynthesisRequest {
        }
    }

    @Override
    public int getMaxBufferSize() {
        return MAX_AUDIO_BUFFER_SIZE;
    }

    @Override
    public int start(int sampleRateInHz, int audioFormat, int channelCount) {
        if (DBG) {
@@ -152,8 +160,9 @@ class FileSynthesisRequest extends SynthesisRequest {
            try {
                // Write WAV header at start of file
                mFile.seek(0);
                int fileLen = (int) mFile.length();
                mFile.write(makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, fileLen));
                int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
                mFile.write(
                        makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
                closeFile();
                return TextToSpeech.SUCCESS;
            } catch (IOException ex) {
@@ -164,8 +173,37 @@ class FileSynthesisRequest extends SynthesisRequest {
        }
    }

    @Override
    public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
            byte[] buffer, int offset, int length) {
        synchronized (mStateLock) {
            if (mStopped) {
                if (DBG) Log.d(TAG, "Request has been aborted.");
                return TextToSpeech.ERROR;
            }
        }
        FileOutputStream out = null;
        try {
            out = new FileOutputStream(mFileName);
            out.write(makeWavHeader(sampleRateInHz, audioFormat, channelCount, length));
            out.write(buffer, offset, length);
            return TextToSpeech.SUCCESS;
        } catch (IOException ex) {
            Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
            return TextToSpeech.ERROR;
        } finally {
            try {
                if (out != null) {
                    out.close();
                }
            } catch (IOException ex) {
                Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
            }
        }
    }

    private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
            int fileLength) {
            int dataLength) {
        // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
        int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
        int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
@@ -177,7 +215,7 @@ class FileSynthesisRequest extends SynthesisRequest {
        header.order(ByteOrder.LITTLE_ENDIAN);

        header.put(new byte[]{ 'R', 'I', 'F', 'F' });
        header.putInt(fileLength - 8);  // RIFF chunk size
        header.putInt(dataLength + WAV_HEADER_LENGTH - 8);  // RIFF chunk size
        header.put(new byte[]{ 'W', 'A', 'V', 'E' });
        header.put(new byte[]{ 'f', 'm', 't', ' ' });
        header.putInt(16);  // size of fmt chunk
@@ -188,7 +226,6 @@ class FileSynthesisRequest extends SynthesisRequest {
        header.putShort(blockAlign);
        header.putShort(bitsPerSample);
        header.put(new byte[]{ 'd', 'a', 't', 'a' });
        int dataLength = fileLength - WAV_HEADER_LENGTH;
        header.putInt(dataLength);

        return headerBuf;
+86 −25
Original line number Diff line number Diff line
@@ -78,6 +78,13 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
        }
    }

    @Override
    public int getMaxBufferSize() {
        // The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be
        // a safe buffer size to pass in.
        return MIN_AUDIO_BUFFER_SIZE;
    }

    // TODO: add a thread that writes to the AudioTrack?
    @Override
    public int start(int sampleRateInHz, int audioFormat, int channelCount) {
@@ -86,20 +93,6 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
                    + "," + channelCount + ")");
        }

        int channelConfig;
        if (channelCount == 1) {
            channelConfig = AudioFormat.CHANNEL_OUT_MONO;
        } else if (channelCount == 2){
            channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
        } else {
            Log.e(TAG, "Unsupported number of channels: " + channelCount);
            return TextToSpeech.ERROR;
        }

        int minBufferSizeInBytes
                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);

        synchronized (mStateLock) {
            if (mStopped) {
                if (DBG) Log.d(TAG, "Request has been aborted.");
@@ -111,22 +104,19 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
                return TextToSpeech.ERROR;
            }

            mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, audioFormat,
                    bufferSizeInBytes, AudioTrack.MODE_STREAM);
            if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
                cleanUp();
            mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount,
                    AudioTrack.MODE_STREAM);
            if (mAudioTrack == null) {
                return TextToSpeech.ERROR;
            }

            setupVolume();
        }

        return TextToSpeech.SUCCESS;
    }

    private void setupVolume() {
        float vol = clip(mVolume, 0.0f, 1.0f);
        float panning = clip(mPan, -1.0f, 1.0f);
    private void setupVolume(AudioTrack audioTrack, float volume, float pan) {
        float vol = clip(volume, 0.0f, 1.0f);
        float panning = clip(pan, -1.0f, 1.0f);
        float volLeft = vol;
        float volRight = vol;
        if (panning > 0.0f) {
@@ -135,7 +125,7 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
            volRight *= (1.0f + panning);
        }
        if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
        if (mAudioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
        if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
            Log.e(TAG, "Failed to set volume");
        }
    }
@@ -148,7 +138,10 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
    public int audioAvailable(byte[] buffer, int offset, int length) {
        if (DBG) {
            Log.d(TAG, "audioAvailable(byte[" + buffer.length + "],"
                    + offset + "," + length + "), thread ID=" + android.os.Process.myTid());
                    + offset + "," + length + ")");
        }
        if (length > getMaxBufferSize()) {
            throw new IllegalArgumentException("buffer is too large (" + length + " bytes)");
        }
        synchronized (mStateLock) {
            if (mStopped) {
@@ -194,4 +187,72 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
        }
        return TextToSpeech.SUCCESS;
    }

    @Override
    public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
            byte[] buffer, int offset, int length) {
        if (DBG) {
            Log.d(TAG, "completeAudioAvailable(" + sampleRateInHz + "," + audioFormat
                    + "," + channelCount + "byte[" + buffer.length + "],"
                    + offset + "," + length + ")");
        }

        synchronized (mStateLock) {
            if (mStopped) {
                if (DBG) Log.d(TAG, "Request has been aborted.");
                return TextToSpeech.ERROR;
            }
            if (mAudioTrack != null) {
                Log.e(TAG, "start() called before completeAudioAvailable()");
                cleanUp();
                return TextToSpeech.ERROR;
            }

            mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount,
                    AudioTrack.MODE_STATIC);
            if (mAudioTrack == null) {
                return TextToSpeech.ERROR;
            }

            try {
                mAudioTrack.write(buffer, offset, length);
                mAudioTrack.play();
            } catch (IllegalStateException ex) {
                Log.e(TAG, "Playback error", ex);
                return TextToSpeech.ERROR;
            } finally {
                cleanUp();
            }
        }

        return TextToSpeech.SUCCESS;
    }

    private AudioTrack createAudioTrack(int sampleRateInHz, int audioFormat, int channelCount,
            int mode) {
        int channelConfig;
        if (channelCount == 1) {
            channelConfig = AudioFormat.CHANNEL_OUT_MONO;
        } else if (channelCount == 2){
            channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
        } else {
            Log.e(TAG, "Unsupported number of channels: " + channelCount);
            return null;
        }

        int minBufferSizeInBytes
                = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
        int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
        AudioTrack audioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig,
                audioFormat, bufferSizeInBytes, mode);
        if (audioTrack == null) {
            return null;
        }
        if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
            audioTrack.release();
            return null;
        }
        setupVolume(audioTrack, mVolume, mPan);
        return audioTrack;
    }
}
 No newline at end of file
+32 −3
Original line number Diff line number Diff line
@@ -18,6 +18,13 @@ package android.speech.tts;
/**
 * A request for speech synthesis given to a TTS engine for processing.
 *
 * The engine can provide streaming audio by calling
 * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
 * {@link #done}.
 *
 * Alternatively, the engine can provide all the audio at once, by using
 * {@link #completeAudioAvailable}.
 *
 * @hide Pending approval
 */
public abstract class SynthesisRequest {
@@ -100,6 +107,12 @@ public abstract class SynthesisRequest {
        return mPitch;
    }

    /**
     * Gets the maximum number of bytes that the TTS engine can pass in a single call of
     * {@link #audioAvailable}. This does not apply to {@link #completeAudioAvailable}.
     */
    public abstract int getMaxBufferSize();

    /**
     * Aborts the speech request.
     *
@@ -117,7 +130,7 @@ public abstract class SynthesisRequest {
     * @param sampleRateInHz Sample rate in HZ of the generated audio.
     * @param audioFormat Audio format of the generated audio. Must be one of
     *         the ENCODING_ constants defined in {@link android.media.AudioFormat}.
     * @param channelCount The number of channels
     * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
     * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
     */
    public abstract int start(int sampleRateInHz, int audioFormat, int channelCount);
@@ -131,8 +144,8 @@ public abstract class SynthesisRequest {
     * @param buffer The generated audio data. This method will not hold on to {@code buffer},
     *         so the caller is free to modify it after this method returns.
     * @param offset The offset into {@code buffer} where the audio data starts.
     * @param length The number of bytes of audio data in {@code buffer}.
     *         Must be less than or equal to {@code buffer.length - offset}.
     * @param length The number of bytes of audio data in {@code buffer}. This must be
     *         less than or equal to the return value of {@link #getMaxBufferSize}.
     * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
     */
    public abstract int audioAvailable(byte[] buffer, int offset, int length);
@@ -148,4 +161,20 @@ public abstract class SynthesisRequest {
     */
    public abstract int done();

    /**
     * The service can call this method instead of using {@link #start}, {@link #audioAvailable}
     * and {@link #done} if all the audio data is available in a single buffer.
     *
     * @param sampleRateInHz Sample rate in HZ of the generated audio.
     * @param audioFormat Audio format of the generated audio. Must be one of
     *         the ENCODING_ constants defined in {@link android.media.AudioFormat}.
     * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
     * @param buffer The generated audio data. This method will not hold on to {@code buffer},
     *         so the caller is free to modify it after this method returns.
     * @param offset The offset into {@code buffer} where the audio data starts.
     * @param length The number of bytes of audio data in {@code buffer}.
     * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
     */
    public abstract int completeAudioAvailable(int sampleRateInHz, int audioFormat,
            int channelCount, byte[] buffer, int offset, int length);
}
 No newline at end of file