Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ad6df74a authored by Przemyslaw Szczepaniak's avatar Przemyslaw Szczepaniak
Browse files

Add support for voices in TTS API.

Voices allow to expose multiple backends/voice packs for a single
Locale. This is an attempt to port this feature from V2 API.

Bug: 15834470
Change-Id: I0117de238cfcf028bcec5344b8d65c960b96b98c
parent 96aacd2a
Loading
Loading
Loading
Loading
+42 −6
Original line number Diff line number Diff line
@@ -26609,6 +26609,7 @@ package android.speech.tts {
    method public int getSpeechRate();
    method public deprecated java.lang.String getText();
    method public java.lang.String getVariant();
    method public java.lang.String getVoiceName();
  }
  public class TextToSpeech {
@@ -26620,13 +26621,17 @@ package android.speech.tts {
    method public int addSpeech(java.lang.CharSequence, java.lang.String, int);
    method public int addSpeech(java.lang.String, java.lang.String);
    method public int addSpeech(java.lang.CharSequence, java.lang.String);
    method public boolean areDefaultsEnforced();
    method public deprecated boolean areDefaultsEnforced();
    method public java.util.Set<java.util.Locale> getAvailableLanguages();
    method public java.lang.String getDefaultEngine();
    method public java.util.Locale getDefaultLanguage();
    method public deprecated java.util.Locale getDefaultLanguage();
    method public android.speech.tts.Voice getDefaultVoice();
    method public java.util.List<android.speech.tts.TextToSpeech.EngineInfo> getEngines();
    method public java.util.Set<java.lang.String> getFeatures(java.util.Locale);
    method public java.util.Locale getLanguage();
    method public deprecated java.util.Set<java.lang.String> getFeatures(java.util.Locale);
    method public deprecated java.util.Locale getLanguage();
    method public static int getMaxSpeechInputLength();
    method public android.speech.tts.Voice getVoice();
    method public java.util.Set<android.speech.tts.Voice> getVoices();
    method public int isLanguageAvailable(java.util.Locale);
    method public boolean isSpeaking();
    method public int playEarcon(java.lang.String, int, java.util.HashMap<java.lang.String, java.lang.String>, java.lang.String);
@@ -26639,6 +26644,7 @@ package android.speech.tts {
    method public int setOnUtteranceProgressListener(android.speech.tts.UtteranceProgressListener);
    method public int setPitch(float);
    method public int setSpeechRate(float);
    method public int setVoice(android.speech.tts.Voice);
    method public void shutdown();
    method public int speak(java.lang.CharSequence, int, java.util.HashMap<java.lang.String, java.lang.String>, java.lang.String);
    method public deprecated int speak(java.lang.String, int, java.util.HashMap<java.lang.String, java.lang.String>);
@@ -26650,6 +26656,7 @@ package android.speech.tts {
    field public static final int ERROR_INVALID_REQUEST = -8; // 0xfffffff8
    field public static final int ERROR_NETWORK = -6; // 0xfffffffa
    field public static final int ERROR_NETWORK_TIMEOUT = -7; // 0xfffffff9
    field public static final int ERROR_NOT_INSTALLED_YET = -9; // 0xfffffff7
    field public static final int ERROR_OUTPUT = -5; // 0xfffffffb
    field public static final int ERROR_SERVICE = -4; // 0xfffffffc
    field public static final int ERROR_SYNTHESIS = -3; // 0xfffffffd
@@ -26685,8 +26692,11 @@ package android.speech.tts {
    field public static final deprecated java.lang.String EXTRA_VOICE_DATA_FILES_INFO = "dataFilesInfo";
    field public static final deprecated java.lang.String EXTRA_VOICE_DATA_ROOT_DIRECTORY = "dataRoot";
    field public static final java.lang.String INTENT_ACTION_TTS_SERVICE = "android.intent.action.TTS_SERVICE";
    field public static final java.lang.String KEY_FEATURE_EMBEDDED_SYNTHESIS = "embeddedTts";
    field public static final java.lang.String KEY_FEATURE_NETWORK_SYNTHESIS = "networkTts";
    field public static final deprecated java.lang.String KEY_FEATURE_EMBEDDED_SYNTHESIS = "embeddedTts";
    field public static final java.lang.String KEY_FEATURE_NETWORK_RETRIES_COUNT = "networkRetriesCount";
    field public static final deprecated java.lang.String KEY_FEATURE_NETWORK_SYNTHESIS = "networkTts";
    field public static final java.lang.String KEY_FEATURE_NETWORK_TIMEOUT_MS = "networkTimeoutMs";
    field public static final java.lang.String KEY_FEATURE_NOT_INSTALLED = "notInstalled";
    field public static final java.lang.String KEY_PARAM_PAN = "pan";
    field public static final java.lang.String KEY_PARAM_SESSION_ID = "sessionId";
    field public static final java.lang.String KEY_PARAM_STREAM = "streamType";
@@ -26712,11 +26722,15 @@ package android.speech.tts {
  public abstract class TextToSpeechService extends android.app.Service {
    ctor public TextToSpeechService();
    method protected int isValidVoiceName(java.lang.String);
    method public android.os.IBinder onBind(android.content.Intent);
    method protected java.lang.String onGetDefaultVoiceNameFor(java.lang.String, java.lang.String, java.lang.String);
    method protected java.util.Set<java.lang.String> onGetFeaturesForLanguage(java.lang.String, java.lang.String, java.lang.String);
    method protected abstract java.lang.String[] onGetLanguage();
    method protected java.util.List<android.speech.tts.Voice> onGetVoices();
    method protected abstract int onIsLanguageAvailable(java.lang.String, java.lang.String, java.lang.String);
    method protected abstract int onLoadLanguage(java.lang.String, java.lang.String, java.lang.String);
    method protected int onLoadVoice(java.lang.String);
    method protected abstract void onStop();
    method protected abstract void onSynthesizeText(android.speech.tts.SynthesisRequest, android.speech.tts.SynthesisCallback);
  }
@@ -26808,6 +26822,28 @@ package android.speech.tts {
    method public abstract void onStart(java.lang.String);
  }
  public class Voice implements android.os.Parcelable {
    ctor public Voice(java.lang.String, java.util.Locale, int, int, boolean, java.util.Set<java.lang.String>);
    method public int describeContents();
    method public java.util.Set<java.lang.String> getFeatures();
    method public int getLatency();
    method public java.util.Locale getLocale();
    method public java.lang.String getName();
    method public int getQuality();
    method public boolean getRequiresNetworkConnection();
    method public void writeToParcel(android.os.Parcel, int);
    field public static final int LATENCY_HIGH = 400; // 0x190
    field public static final int LATENCY_LOW = 200; // 0xc8
    field public static final int LATENCY_NORMAL = 300; // 0x12c
    field public static final int LATENCY_VERY_HIGH = 500; // 0x1f4
    field public static final int LATENCY_VERY_LOW = 100; // 0x64
    field public static final int QUALITY_HIGH = 400; // 0x190
    field public static final int QUALITY_LOW = 200; // 0xc8
    field public static final int QUALITY_NORMAL = 300; // 0x12c
    field public static final int QUALITY_VERY_HIGH = 500; // 0x1f4
    field public static final int QUALITY_VERY_LOW = 100; // 0x64
  }
}
package android.system {
+34 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ import android.net.Uri;
import android.os.Bundle;
import android.os.ParcelFileDescriptor;
import android.speech.tts.ITextToSpeechCallback;
import android.speech.tts.Voice;

/**
 * Interface for TextToSpeech to talk to TextToSpeechService.
@@ -173,4 +174,37 @@ interface ITextToSpeechService {
     * @param cb The callback.
     */
    void setCallback(in IBinder caller, ITextToSpeechCallback cb);

    /**
     * Get the array of available voices.
     */
    List<Voice> getVoices();

    /**
     * Notifies the engine that it should load a speech synthesis voice.
     *
     * @param caller a binder representing the identity of the calling
     *        TextToSpeech object.
     * @param voiceName Unique voice of the name.
     * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
     */
    int loadVoice(in IBinder caller, in String voiceName);

    /**
     * Return a name of the default voice for a given locale.
     *
     * This allows {@link TextToSpeech#getVoice} to return a sensible value after a client calls
     * {@link TextToSpeech#setLanguage}.
     *
     * @param lang ISO 3-character language code.
     * @param country ISO 3-character country code. May be empty or null.
     * @param variant Language variant. May be empty or null.
     * @return Code indicating the support status for the locale.
     *         One of {@link TextToSpeech#LANG_AVAILABLE},
     *         {@link TextToSpeech#LANG_COUNTRY_AVAILABLE},
     *         {@link TextToSpeech#LANG_COUNTRY_VAR_AVAILABLE},
     *         {@link TextToSpeech#LANG_MISSING_DATA}
     *         {@link TextToSpeech#LANG_NOT_SUPPORTED}.
     */
    String getDefaultVoiceNameFor(in String lang, in String country, in String variant);
}
+19 −1
Original line number Diff line number Diff line
@@ -24,6 +24,9 @@ import android.os.Bundle;
 *   <li>The synthesis locale, represented as a language, country and a variant.
 *   The language is an ISO 639-3 letter language code, and the country is an
 *   ISO 3166 alpha 3 code. The variant is not specified.</li>
 *   <li>The name of the voice requested for this synthesis. May be empty if
 *   the client uses {@link TextToSpeech#setLanguage} instead of
 *   {@link TextToSpeech#setVoice}</li>
 *   <li>The synthesis speech rate, with 100 being the normal, and
 *   higher values representing higher speech rates.</li>
 *   <li>The voice pitch, with 100 being the default pitch.</li>
@@ -36,6 +39,7 @@ import android.os.Bundle;
public final class SynthesisRequest {
    private final CharSequence mText;
    private final Bundle mParams;
    private String mVoiceName;
    private String mLanguage;
    private String mCountry;
    private String mVariant;
@@ -71,6 +75,13 @@ public final class SynthesisRequest {
        return mText;
    }

    /**
     * Gets the name of the voice to use.
     */
    public String getVoiceName() {
        return mVoiceName;
    }

    /**
     * Gets the ISO 3-letter language code for the language to use.
     */
@@ -129,6 +140,13 @@ public final class SynthesisRequest {
        mVariant = variant;
    }

    /**
     * Sets the voice name for the request.
     */
    void setVoiceName(String voiceName) {
        mVoiceName = voiceName;
    }

    /**
     * Sets the speech rate.
     */
+283 −13

File changed.

Preview size limit exceeded, changes collapsed.

+254 −1
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ import android.util.Log;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@@ -49,7 +50,7 @@ import java.util.Set;

/**
 * Abstract base class for TTS engine implementations. The following methods
 * need to be implemented for V1 API ({@link TextToSpeech}) implementation.
 * need to be implemented:
 * <ul>
 * <li>{@link #onIsLanguageAvailable}</li>
 * <li>{@link #onLoadLanguage}</li>
@@ -76,6 +77,29 @@ import java.util.Set;
 *
 * {@link #onGetLanguage} is not required as of JELLYBEAN_MR2 (API 18) and later, it is only
 * called on earlier versions of Android.
 *
 * API Level 20 adds support for Voice objects. Voices are an abstraction that allow the TTS
 * service to expose multiple backends for a single locale. Each one of them can have a different
 * features set. In order to fully take advantage of voices, an engine should implement
 * the following methods:
 * <ul>
 * <li>{@link #onGetVoices()}</li>
 * <li>{@link #isValidVoiceName(String)}</li>
 * <li>{@link #onLoadVoice(String)}</li>
 * <li>{@link #onGetDefaultVoiceNameFor(String, String, String)}</li>
 * </ul>
 * The first three methods are siblings of the {@link #onGetLanguage},
 * {@link #onIsLanguageAvailable} and {@link #onLoadLanguage} methods. The last one,
 * {@link #onGetDefaultVoiceNameFor(String, String, String)} is a link between locale and voice
 * based methods. Since API level 20 {@link TextToSpeech#setLanguage} is implemented by
 * calling {@link TextToSpeech#setVoice} with the voice returned by
 * {@link #onGetDefaultVoiceNameFor(String, String, String)}.
 *
 * If the client uses a voice instead of a locale, {@link SynthesisRequest} will contain the
 * requested voice name.
 *
 * The default implementations of Voice-related methods implement them using the
 * pre-existing locale-based implementation.
 */
public abstract class TextToSpeechService extends Service {

@@ -228,6 +252,160 @@ public abstract class TextToSpeechService extends Service {
        return null;
    }

    private int getExpectedLanguageAvailableStatus(Locale locale) {
        int expectedStatus = TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE;
        if (locale.getVariant().isEmpty()) {
            if (locale.getCountry().isEmpty()) {
                expectedStatus = TextToSpeech.LANG_AVAILABLE;
            } else {
                expectedStatus = TextToSpeech.LANG_COUNTRY_AVAILABLE;
            }
        }
        return expectedStatus;
    }

    /**
     * Queries the service for a set of supported voices.
     *
     * Can be called on multiple threads.
     *
     * The default implementation tries to enumerate all available locales, pass them to
     * {@link #onIsLanguageAvailable(String, String, String)} and create Voice instances (using
     * the locale's BCP-47 language tag as the voice name) for the ones that are supported.
     * Note, that this implementation is suitable only for engines that don't have multiple voices
     * for a single locale. Also, this implementation won't work with Locales not listed in the
     * set returned by the {@link Locale#getAvailableLocales()} method.
     *
     * @return A list of voices supported.
     */
    protected List<Voice> onGetVoices() {
        // Enumerate all locales and check if they are available
        ArrayList<Voice> voices = new ArrayList<Voice>();
        for (Locale locale : Locale.getAvailableLocales()) {
            int expectedStatus = getExpectedLanguageAvailableStatus(locale);
            try {
                int localeStatus = onIsLanguageAvailable(locale.getISO3Language(),
                        locale.getISO3Country(), locale.getVariant());
                if (localeStatus != expectedStatus) {
                    continue;
                }
            } catch (MissingResourceException e) {
                // Ignore locale without iso 3 codes
                continue;
            }
            Set<String> features = onGetFeaturesForLanguage(locale.getISO3Language(),
                    locale.getISO3Country(), locale.getVariant());
            voices.add(new Voice(locale.toLanguageTag(), locale, Voice.QUALITY_NORMAL,
                    Voice.LATENCY_NORMAL, false, features));
        }
        return voices;
    }

    /**
     * Return a name of the default voice for a given locale.
     *
     * This method provides a mapping between locales and available voices. This method is
     * used in {@link TextToSpeech#setLanguage}, which calls this method and then calls
     * {@link TextToSpeech#setVoice} with the voice returned by this method.
     *
     * Also, it's used by {@link TextToSpeech#getDefaultVoice()} to find a default voice for
     * the default locale.
     *
     * @param lang ISO-3 language code.
     * @param country ISO-3 country code. May be empty or null.
     * @param variant Language variant. May be empty or null.

     * @return A name of the default voice for a given locale.
     */
    protected String onGetDefaultVoiceNameFor(String lang, String country, String variant) {
        int localeStatus = onIsLanguageAvailable(lang, country, variant);
        Locale iso3Locale = null;
        switch (localeStatus) {
            case TextToSpeech.LANG_AVAILABLE:
                iso3Locale = new Locale(lang);
                break;
            case TextToSpeech.LANG_COUNTRY_AVAILABLE:
                iso3Locale = new Locale(lang, country);
                break;
            case TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE:
                iso3Locale = new Locale(lang, country, variant);
                break;
            default:
                return null;
        }
        Locale properLocale = TtsEngines.normalizeTTSLocale(iso3Locale);
        String voiceName = properLocale.toLanguageTag();
        if (isValidVoiceName(voiceName) == TextToSpeech.SUCCESS) {
            return voiceName;
        } else {
            return null;
        }
    }

    /**
     * Notifies the engine that it should load a speech synthesis voice. There is no guarantee
     * that this method is always called before the voice is used for synthesis. It is merely
     * a hint to the engine that it will probably get some synthesis requests for this voice
     * at some point in the future.
     *
     * Will be called only on synthesis thread.
     *
     * The default implementation creates a Locale from the voice name (by interpreting the name as
     * a BCP-47 tag for the locale), and passes it to
     * {@link #onLoadLanguage(String, String, String)}.
     *
     * @param voiceName Name of the voice.
     * @return {@link TextToSpeech#ERROR} or {@link TextToSpeech#SUCCESS}.
     */
    protected int onLoadVoice(String voiceName) {
        Locale locale = Locale.forLanguageTag(voiceName);
        if (locale == null) {
            return TextToSpeech.ERROR;
        }
        int expectedStatus = getExpectedLanguageAvailableStatus(locale);
        try {
            int localeStatus = onIsLanguageAvailable(locale.getISO3Language(),
                    locale.getISO3Country(), locale.getVariant());
            if (localeStatus != expectedStatus) {
                return TextToSpeech.ERROR;
            }
            onLoadLanguage(locale.getISO3Language(),
                    locale.getISO3Country(), locale.getVariant());
            return TextToSpeech.SUCCESS;
        } catch (MissingResourceException e) {
            return TextToSpeech.ERROR;
        }
    }

    /**
     * Checks whether the engine supports a voice with a given name.
     *
     * Can be called on multiple threads.
     *
     * The default implementation treats the voice name as a language tag, creating a Locale from
     * the voice name, and passes it to {@link #onIsLanguageAvailable(String, String, String)}.
     *
     * @param voiceName Name of the voice.
     * @return {@link TextToSpeech#ERROR} or {@link TextToSpeech#SUCCESS}.
     */
    protected int isValidVoiceName(String voiceName) {
        Locale locale = Locale.forLanguageTag(voiceName);
        if (locale == null) {
            return TextToSpeech.ERROR;
        }
        int expectedStatus = getExpectedLanguageAvailableStatus(locale);
        try {
            int localeStatus = onIsLanguageAvailable(locale.getISO3Language(),
                    locale.getISO3Country(), locale.getVariant());
            if (localeStatus != expectedStatus) {
                return TextToSpeech.ERROR;
            }
            return TextToSpeech.SUCCESS;
        } catch (MissingResourceException e) {
            return TextToSpeech.ERROR;
        }
    }

    private int getDefaultSpeechRate() {
        return getSecureSettingInt(Settings.Secure.TTS_DEFAULT_RATE, Engine.DEFAULT_RATE);
    }
@@ -736,7 +914,11 @@ public abstract class TextToSpeechService extends Service {
        }

        private void setRequestParams(SynthesisRequest request) {
            String voiceName = getVoiceName();
            request.setLanguage(getLanguage(), getCountry(), getVariant());
            if (!TextUtils.isEmpty(voiceName)) {
                request.setVoiceName(getVoiceName());
            }
            request.setSpeechRate(getSpeechRate());
            request.setCallerUid(mCallerUid);
            request.setPitch(getPitch());
@@ -770,6 +952,10 @@ public abstract class TextToSpeechService extends Service {
        public String getLanguage() {
            return getStringParam(mParams, Engine.KEY_PARAM_LANGUAGE, mDefaultLocale[0]);
        }

        public String getVoiceName() {
            return getStringParam(mParams, Engine.KEY_PARAM_VOICE_NAME, "");
        }
    }

    private class SynthesisToFileOutputStreamSpeechItemV1 extends SynthesisSpeechItemV1 {
@@ -896,6 +1082,35 @@ public abstract class TextToSpeechService extends Service {
        }
    }

    /**
     * Call {@link TextToSpeechService#onLoadLanguage} on synth thread.
     */
    private class LoadVoiceItem extends SpeechItem {
        private final String mVoiceName;

        public LoadVoiceItem(Object callerIdentity, int callerUid, int callerPid,
                String voiceName) {
            super(callerIdentity, callerUid, callerPid);
            mVoiceName = voiceName;
        }

        @Override
        public boolean isValid() {
            return true;
        }

        @Override
        protected void playImpl() {
            TextToSpeechService.this.onLoadVoice(mVoiceName);
        }

        @Override
        protected void stopImpl() {
            // No-op
        }
    }


    @Override
    public IBinder onBind(Intent intent) {
        if (TextToSpeech.Engine.INTENT_ACTION_TTS_SERVICE.equals(intent.getAction())) {
@@ -1041,6 +1256,44 @@ public abstract class TextToSpeechService extends Service {
            return retVal;
        }

        @Override
        public List<Voice> getVoices() {
            return onGetVoices();
        }

        @Override
        public int loadVoice(IBinder caller, String voiceName) {
            if (!checkNonNull(voiceName)) {
                return TextToSpeech.ERROR;
            }
            int retVal = isValidVoiceName(voiceName);

            if (retVal == TextToSpeech.SUCCESS) {
                SpeechItem item = new LoadVoiceItem(caller, Binder.getCallingUid(),
                        Binder.getCallingPid(), voiceName);
                if (mSynthHandler.enqueueSpeechItem(TextToSpeech.QUEUE_ADD, item) !=
                        TextToSpeech.SUCCESS) {
                    return TextToSpeech.ERROR;
                }
            }
            return retVal;
        }

        public String getDefaultVoiceNameFor(String lang, String country, String variant) {
            if (!checkNonNull(lang)) {
                return null;
            }
            int retVal = onIsLanguageAvailable(lang, country, variant);

            if (retVal == TextToSpeech.LANG_AVAILABLE ||
                    retVal == TextToSpeech.LANG_COUNTRY_AVAILABLE ||
                    retVal == TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE) {
                return onGetDefaultVoiceNameFor(lang, country, variant);
            } else {
                return null;
            }
        }

        @Override
        public void setCallback(IBinder caller, ITextToSpeechCallback cb) {
            // Note that passing in a null callback is a valid use case.
Loading