Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 90d15d23 authored by Przemyslaw Szczepaniak's avatar Przemyslaw Szczepaniak
Browse files

Add new TTS API.

Much of existing API is reused (mostly on the service side). The new API
provides better support for network-based synthesis, defines explicit
synthesis fallback support, has better error handling, allows for multiple
different voices per locale, features enumeration and custom extensions
on a voice level and many more.

If a service does not provide a V2 implementation, a basic adapter
from V2 to existing V1 implementation is provided.

Bug: 8259486
Change-Id: I797b7fc054f11c9b0d930f4dea582e57596ea127
parent b9ca1a9a
Loading
Loading
Loading
Loading
+164 −1
Original line number Diff line number Diff line
@@ -23403,11 +23403,52 @@ package android.speech {
package android.speech.tts {
  public final class RequestConfig {
    method public android.os.Bundle getAudioParams();
    method public android.speech.tts.VoiceInfo getVoice();
    method public android.os.Bundle getVoiceParams();
  }
  public static final class RequestConfig.Builder {
    method public android.speech.tts.RequestConfig build();
    method public static android.speech.tts.RequestConfig.Builder newBuilder();
    method public static android.speech.tts.RequestConfig.Builder newBuilder(android.speech.tts.RequestConfig);
    method public android.speech.tts.RequestConfig.Builder setAudioParam(java.lang.String, java.lang.Object);
    method public void setAudioParamPan(float);
    method public void setAudioParamStream(int);
    method public void setAudioParamVolume(float);
    method public android.speech.tts.RequestConfig.Builder setVoice(android.speech.tts.VoiceInfo);
    method public android.speech.tts.RequestConfig.Builder setVoiceParam(java.lang.String, java.lang.Object);
  }
  public final class RequestConfigHelper {
    method public static android.speech.tts.RequestConfig highestQuality(android.speech.tts.TextToSpeechClient.EngineStatus, boolean, android.speech.tts.RequestConfigHelper.VoiceScorer);
    method public static android.speech.tts.RequestConfig highestQuality(android.speech.tts.TextToSpeechClient.EngineStatus, boolean);
  }
  public static final class RequestConfigHelper.ExactLocaleMatcher implements android.speech.tts.RequestConfigHelper.VoiceScorer {
    ctor public RequestConfigHelper.ExactLocaleMatcher(java.util.Locale);
    method public int scoreVoice(android.speech.tts.VoiceInfo);
  }
  public static final class RequestConfigHelper.LanguageMatcher implements android.speech.tts.RequestConfigHelper.VoiceScorer {
    ctor public RequestConfigHelper.LanguageMatcher(java.util.Locale);
    method public int scoreVoice(android.speech.tts.VoiceInfo);
  }
  public static abstract interface RequestConfigHelper.VoiceScorer {
    method public abstract int scoreVoice(android.speech.tts.VoiceInfo);
  }
  public abstract interface SynthesisCallback {
    method public abstract int audioAvailable(byte[], int, int);
    method public abstract int done();
    method public abstract void error();
    method public abstract void error(int);
    method public abstract int fallback();
    method public abstract int getMaxBufferSize();
    method public abstract boolean hasFinished();
    method public abstract boolean hasStarted();
    method public abstract int start(int, int, int);
  }
@@ -23423,7 +23464,17 @@ package android.speech.tts {
    method public java.lang.String getVariant();
  }
  public class TextToSpeech {
  public final class SynthesisRequestV2 implements android.os.Parcelable {
    method public int describeContents();
    method public android.os.Bundle getAudioParams();
    method public java.lang.String getText();
    method public java.lang.String getUtteranceId();
    method public int getVoiceId();
    method public android.os.Bundle getVoiceParams();
    method public void writeToParcel(android.os.Parcel, int);
  }
  public deprecated class TextToSpeech {
    ctor public TextToSpeech(android.content.Context, android.speech.tts.TextToSpeech.OnInitListener);
    ctor public TextToSpeech(android.content.Context, android.speech.tts.TextToSpeech.OnInitListener, java.lang.String);
    method public int addEarcon(java.lang.String, java.lang.String, int);
@@ -23508,8 +23559,80 @@ package android.speech.tts {
    method public abstract void onUtteranceCompleted(java.lang.String);
  }
  public final class TextToSpeechClient {
    ctor public TextToSpeechClient(android.content.Context, java.lang.String, boolean, android.speech.tts.TextToSpeechClient.RequestCallbacks, android.speech.tts.TextToSpeechClient.ConnectionCallbacks);
    ctor public TextToSpeechClient(android.content.Context, android.speech.tts.TextToSpeechClient.RequestCallbacks, android.speech.tts.TextToSpeechClient.ConnectionCallbacks);
    method public void connect();
    method public void disconnect();
    method public android.speech.tts.TextToSpeechClient.EngineStatus getEngineStatus();
    method public boolean isConnected();
    method public void queueAudio(android.net.Uri, android.speech.tts.TextToSpeechClient.UtteranceId, android.speech.tts.RequestConfig, android.speech.tts.TextToSpeechClient.RequestCallbacks);
    method public void queueSilence(long, android.speech.tts.TextToSpeechClient.UtteranceId, android.speech.tts.TextToSpeechClient.RequestCallbacks);
    method public void queueSpeak(java.lang.String, android.speech.tts.TextToSpeechClient.UtteranceId, android.speech.tts.RequestConfig, android.speech.tts.TextToSpeechClient.RequestCallbacks);
    method public void queueSynthesizeToFile(java.lang.String, android.speech.tts.TextToSpeechClient.UtteranceId, java.io.File, android.speech.tts.RequestConfig, android.speech.tts.TextToSpeechClient.RequestCallbacks);
    method public void stop();
  }
  public static abstract interface TextToSpeechClient.ConnectionCallbacks {
    method public abstract void onConnectionFailure();
    method public abstract void onConnectionSuccess();
    method public abstract void onEngineStatusChange(android.speech.tts.TextToSpeechClient.EngineStatus);
    method public abstract void onServiceDisconnected();
  }
  public static final class TextToSpeechClient.EngineStatus {
    method public java.lang.String getEnginePackage();
    method public java.util.List<android.speech.tts.VoiceInfo> getVoices();
  }
  public static final class TextToSpeechClient.Params {
    field public static final java.lang.String AUDIO_PARAM_PAN = "pan";
    field public static final java.lang.String AUDIO_PARAM_STREAM = "streamType";
    field public static final java.lang.String AUDIO_PARAM_VOLUME = "volume";
    field public static final java.lang.String FALLBACK_VOICE_ID = "fallbackVoiceId";
    field public static final java.lang.String NETWORK_RETRIES_COUNT = "networkRetriesCount";
    field public static final java.lang.String NETWORK_TIMEOUT_MS = "networkTimeoutMs";
    field public static final java.lang.String SPEECH_PITCH = "speechPitch";
    field public static final java.lang.String SPEECH_SPEED = "speechSpeed";
    field public static final java.lang.String TRACK_SUBUTTERANCE_PROGRESS = "trackSubutteranceProgress";
  }
  public static abstract class TextToSpeechClient.RequestCallbacks {
    ctor public TextToSpeechClient.RequestCallbacks();
    method public void onSynthesisFailure(android.speech.tts.TextToSpeechClient.UtteranceId, int);
    method public void onSynthesisFallback(android.speech.tts.TextToSpeechClient.UtteranceId);
    method public void onSynthesisProgress(android.speech.tts.TextToSpeechClient.UtteranceId, int, int);
    method public void onSynthesisStart(android.speech.tts.TextToSpeechClient.UtteranceId);
    method public void onSynthesisStop(android.speech.tts.TextToSpeechClient.UtteranceId);
    method public void onSynthesisSuccess(android.speech.tts.TextToSpeechClient.UtteranceId);
  }
  public static final class TextToSpeechClient.Status {
    field public static final int ERROR_DOWNLOADING_ADDITIONAL_DATA = 17; // 0x11
    field public static final int ERROR_INVALID_REQUEST = 15; // 0xf
    field public static final int ERROR_NETWORK = 13; // 0xd
    field public static final int ERROR_NETWORK_TIMEOUT = 14; // 0xe
    field public static final int ERROR_NON_UNIQUE_UTTERANCE_ID = 16; // 0x10
    field public static final int ERROR_OUTPUT = 12; // 0xc
    field public static final int ERROR_SERVICE = 11; // 0xb
    field public static final int ERROR_SYNTHESIS = 10; // 0xa
    field public static final int ERROR_UNKNOWN = -1; // 0xffffffff
    field public static final int STOPPED = 100; // 0x64
    field public static final int SUCCESS = 0; // 0x0
  }
  public static final class TextToSpeechClient.UtteranceId {
    ctor public TextToSpeechClient.UtteranceId();
    ctor public TextToSpeechClient.UtteranceId(java.lang.String);
    method public java.lang.String toUniqueString();
  }
  public abstract class TextToSpeechService extends android.app.Service {
    ctor public TextToSpeechService();
    method protected java.util.List<android.speech.tts.VoiceInfo> checkVoicesInfo();
    method public void forceVoicesInfoCheck();
    method public android.speech.tts.VoiceInfo getVoicesInfoWithId(int);
    method protected boolean implementsV2API();
    method public android.os.IBinder onBind(android.content.Intent);
    method protected java.util.Set<java.lang.String> onGetFeaturesForLanguage(java.lang.String, java.lang.String, java.lang.String);
    method protected abstract java.lang.String[] onGetLanguage();
@@ -23517,6 +23640,8 @@ package android.speech.tts {
    method protected abstract int onLoadLanguage(java.lang.String, java.lang.String, java.lang.String);
    method protected abstract void onStop();
    method protected abstract void onSynthesizeText(android.speech.tts.SynthesisRequest, android.speech.tts.SynthesisCallback);
    method protected void onSynthesizeTextV2(android.speech.tts.SynthesisRequestV2, android.speech.tts.VoiceInfo, android.speech.tts.SynthesisCallback);
    method protected void onVoicesInfoChange();
  }
  public abstract class UtteranceProgressListener {
@@ -23526,6 +23651,44 @@ package android.speech.tts {
    method public abstract void onStart(java.lang.String);
  }
  public final class VoiceInfo implements android.os.Parcelable {
    method public int describeContents();
    method public android.os.Bundle getAdditionalFeatures();
    method public int getId();
    method public int getLatency();
    method public java.util.Locale getLocale();
    method public android.os.Bundle getParamsWithDefaults();
    method public int getQuality();
    method public boolean getRequiresNetworkConnection();
    method public void writeToParcel(android.os.Parcel, int);
    field public static final java.lang.String FEATURE_MAY_AUTOINSTALL = "mayAutoInstall";
    field public static final java.lang.String FEATURE_SPEAKER_GENDER = "speakerGender";
    field public static final java.lang.String FEATURE_WORDS_PER_MINUTE = "wordsPerMinute";
    field public static final int LATENCY_HIGH = 400; // 0x190
    field public static final int LATENCY_LOW = 200; // 0xc8
    field public static final int LATENCY_NORMAL = 300; // 0x12c
    field public static final int LATENCY_VERY_HIGH = 500; // 0x1f4
    field public static final int LATENCY_VERY_LOW = 100; // 0x64
    field public static final int QUALITY_HIGH = 400; // 0x190
    field public static final int QUALITY_LOW = 200; // 0xc8
    field public static final int QUALITY_NORMAL = 300; // 0x12c
    field public static final int QUALITY_VERY_HIGH = 500; // 0x1f4
    field public static final int QUALITY_VERY_LOW = 100; // 0x64
  }
  public static final class VoiceInfo.Builder {
    ctor public VoiceInfo.Builder();
    ctor public VoiceInfo.Builder(android.speech.tts.VoiceInfo);
    method public android.speech.tts.VoiceInfo build();
    method public android.speech.tts.VoiceInfo.Builder setAdditionalFeatures(android.os.Bundle);
    method public android.speech.tts.VoiceInfo.Builder setId(int);
    method public android.speech.tts.VoiceInfo.Builder setLatency(int);
    method public android.speech.tts.VoiceInfo.Builder setLocale(java.util.Locale);
    method public android.speech.tts.VoiceInfo.Builder setParamsWithDefaults(android.os.Bundle);
    method public android.speech.tts.VoiceInfo.Builder setQuality(int);
    method public android.speech.tts.VoiceInfo.Builder setRequiresNetworkConnection(boolean);
  }
}
package android.telephony {
+124 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2011 The Android Open Source Project
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
@@ -16,36 +16,28 @@
package android.speech.tts;

import android.os.SystemClock;
import android.text.TextUtils;
import android.util.Log;

/**
 * Writes data about a given speech synthesis request to the event logs.
 * The data that is logged includes the calling app, length of the utterance,
 * speech rate / pitch and the latency and overall time taken.
 *
 * Note that {@link EventLogger#onStopped()} and {@link EventLogger#onError()}
 * might be called from any thread, but on {@link EventLogger#onAudioDataWritten()} and
 * {@link EventLogger#onComplete()} must be called from a single thread
 * (usually the audio playback thread}
 * Base class for storing data about a given speech synthesis request to the
 * event logs. The data that is logged depends on actual implementation. Note
 * that {@link AbstractEventLogger#onAudioDataWritten()} and
 * {@link AbstractEventLogger#onEngineComplete()} must be called from a single
 * thread (usually the audio playback thread}.
 */
class EventLogger {
    private final SynthesisRequest mRequest;
    private final String mServiceApp;
    private final int mCallerUid;
    private final int mCallerPid;
    private final long mReceivedTime;
    private long mPlaybackStartTime = -1;
abstract class AbstractEventLogger {
    protected final String mServiceApp;
    protected final int mCallerUid;
    protected final int mCallerPid;
    protected final long mReceivedTime;
    protected long mPlaybackStartTime = -1;

    private volatile long mRequestProcessingStartTime = -1;
    private volatile long mEngineStartTime = -1;
    private volatile long mEngineCompleteTime = -1;

    private volatile boolean mError = false;
    private volatile boolean mStopped = false;
    private boolean mLogWritten = false;

    EventLogger(SynthesisRequest request, int callerUid, int callerPid, String serviceApp) {
        mRequest = request;
    AbstractEventLogger(int callerUid, int callerPid, String serviceApp) {
        mCallerUid = callerUid;
        mCallerPid = callerPid;
        mServiceApp = serviceApp;
@@ -93,27 +85,11 @@ class EventLogger {
        }
    }

    /**
     * Notifies the logger that the current synthesis was stopped.
     * Latency numbers are not reported for stopped syntheses.
     */
    public void onStopped() {
        mStopped = false;
    }

    /**
     * Notifies the logger that the current synthesis resulted in
     * an error. This is logged using {@link EventLogTags#writeTtsSpeakFailure}.
     */
    public void onError() {
        mError = true;
    }

    /**
     * Notifies the logger that the current synthesis has completed.
     * All available data is not logged.
     */
    public void onWriteData() {
    public void onCompleted(int statusCode) {
        if (mLogWritten) {
            return;
        } else {
@@ -121,58 +97,28 @@ class EventLogger {
        }

        long completionTime = SystemClock.elapsedRealtime();
        // onAudioDataWritten() should normally always be called if an
        // error does not occur.
        if (mError || mPlaybackStartTime == -1 || mEngineCompleteTime == -1) {
            EventLogTags.writeTtsSpeakFailure(mServiceApp, mCallerUid, mCallerPid,
                    getUtteranceLength(), getLocaleString(),
                    mRequest.getSpeechRate(), mRequest.getPitch());
            return;
        }

        // We don't report stopped syntheses because their overall
        // total time spent will be innacurate (will not correlate with
        // We don't report latency for stopped syntheses because their overall
        // total time spent will be inaccurate (will not correlate with
        // the length of the utterance).
        if (mStopped) {

        // onAudioDataWritten() should normally always be called, and hence mPlaybackStartTime
        // should be set, if an error does not occur.
        if (statusCode != TextToSpeechClient.Status.SUCCESS
                || mPlaybackStartTime == -1 || mEngineCompleteTime == -1) {
            logFailure(statusCode);
            return;
        }

        final long audioLatency = mPlaybackStartTime - mReceivedTime;
        final long engineLatency = mEngineStartTime - mRequestProcessingStartTime;
        final long engineTotal = mEngineCompleteTime - mRequestProcessingStartTime;

        EventLogTags.writeTtsSpeakSuccess(mServiceApp, mCallerUid, mCallerPid,
                getUtteranceLength(), getLocaleString(),
                mRequest.getSpeechRate(), mRequest.getPitch(),
                engineLatency, engineTotal, audioLatency);
        logSuccess(audioLatency, engineLatency, engineTotal);
    }

    /**
     * @return the length of the utterance for the given synthesis, 0
     *          if the utterance was {@code null}.
     */
    private int getUtteranceLength() {
        final String utterance = mRequest.getText();
        return utterance == null ? 0 : utterance.length();
    }
    protected abstract void logFailure(int statusCode);
    protected abstract void logSuccess(long audioLatency, long engineLatency,
            long engineTotal);

    /**
     * Returns a formatted locale string from the synthesis params of the
     * form lang-country-variant.
     */
    private String getLocaleString() {
        StringBuilder sb = new StringBuilder(mRequest.getLanguage());
        if (!TextUtils.isEmpty(mRequest.getCountry())) {
            sb.append('-');
            sb.append(mRequest.getCountry());

            if (!TextUtils.isEmpty(mRequest.getVariant())) {
                sb.append('-');
                sb.append(mRequest.getVariant());
            }
        }

        return sb.toString();
    }

}
+27 −2
Original line number Diff line number Diff line
@@ -15,15 +15,28 @@
 */
package android.speech.tts;


/**
 * Defines additional methods the synthesis callback must implement that
 * are private to the TTS service implementation.
 *
 * All of these class methods (with the exception of {@link #stop()}) can be only called on the
 * synthesis thread, while inside
 * {@link TextToSpeechService#onSynthesizeText} or {@link TextToSpeechService#onSynthesizeTextV2}.
 * {@link #stop()} is the exception, it may be called from multiple threads.
 */
abstract class AbstractSynthesisCallback implements SynthesisCallback {
    /** If true, request comes from V2 TTS interface */
    protected final boolean mClientIsUsingV2;

    /**
     * Checks whether the synthesis request completed successfully.
     * Constructor.
     * @param clientIsUsingV2 If true, this callback will be used inside
     *         {@link TextToSpeechService#onSynthesizeTextV2} method.
     */
    abstract boolean isDone();
    AbstractSynthesisCallback(boolean clientIsUsingV2) {
        mClientIsUsingV2 = clientIsUsingV2;
    }

    /**
     * Aborts the speech request.
@@ -31,4 +44,16 @@ abstract class AbstractSynthesisCallback implements SynthesisCallback {
     * Can be called from multiple threads.
     */
    abstract void stop();

    /**
     * Get status code for a "stop".
     *
     * V2 Clients will receive special status, V1 clients will receive standard error.
     *
     * This method should only be called on the synthesis thread,
     * while in {@link TextToSpeechService#onSynthesizeText}.
     */
    int errorCodeOnStop() {
        return mClientIsUsingV2 ? TextToSpeechClient.Status.STOPPED : TextToSpeech.ERROR;
    }
}
+1 −1
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ class AudioPlaybackHandler {
            return;
        }

        item.stop(false);
        item.stop(TextToSpeechClient.Status.STOPPED);
    }

    public void enqueue(PlaybackQueueItem item) {
+4 −4
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ class AudioPlaybackQueueItem extends PlaybackQueueItem {
        dispatcher.dispatchOnStart();
        mPlayer = MediaPlayer.create(mContext, mUri);
        if (mPlayer == null) {
            dispatcher.dispatchOnError();
            dispatcher.dispatchOnError(TextToSpeechClient.Status.ERROR_OUTPUT);
            return;
        }

@@ -83,9 +83,9 @@ class AudioPlaybackQueueItem extends PlaybackQueueItem {
        }

        if (mFinished) {
            dispatcher.dispatchOnDone();
            dispatcher.dispatchOnSuccess();
        } else {
            dispatcher.dispatchOnError();
            dispatcher.dispatchOnStop();
        }
    }

@@ -99,7 +99,7 @@ class AudioPlaybackQueueItem extends PlaybackQueueItem {
    }

    @Override
    void stop(boolean isError) {
    void stop(int errorCode) {
        mDone.open();
    }
}
Loading