Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e5b9dbe8 authored by Aleksandar Kiridzic's avatar Aleksandar Kiridzic
Browse files

Speech: Public API for the language detection and switching features

onLanguageDetection callback method is added to IRecognitionListener
aidl, and is to be called when the language detection (and switching)
results are available. This callback method can be invoked on any
number of occasions at any time between the beginning and the end of
speech, depending on the speech recognition service implementation.

The following entities are created for the language detection feature:
 - `android.speech.extra.ENABLE_LANGUAGE_DETECTION`
 and `android.speech.extra.LANGUAGE_DETECTION_ALLOWED_LANGUAGES`
 key entries for the RecognizerIntent extras;
 - `detected_language`, `language_detection_confidence_level`
 and `top_locale_alternatives` key entries
 for the SpeechRecognizer result Bundle;
 - Language detection confidence levels valued
 as `UNKNOWN`, `NOT_CONFIDENT`, `CONFIDENT` and `HIGHLY_CONFIDENT`.

The following entities are created for the language switch feature:
 - `android.speech.extra.ENABLE_LANGUAGE_SWITCH`
 and `android.speech.extra.LANGUAGE_SWITCH_ALLOWED_LANGUAGES`
 key entries for the RecognizerIntent extras;
 - Language switch sensitivity levels valued
 as `high_precision`, `balanced`, and `quick_response`.
 - `language_switch_result` key entry
 for the SpeechRecognizer result Bundle;
 - Language switch results valued
 as `NOT_ATTEMPTED`, `SUCCEEDED`, `FAILED` and `SKIPPED_NO_MODEL`.

Bug: 268317656
Bug: 268475583
Test: CTS
Change-Id: I52266b1676535812c9f036149bf3a46f2aaf2340
parent 15078cd0
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -41386,6 +41386,7 @@ package android.speech {
    method public void onEndOfSpeech();
    method public void onError(int);
    method public void onEvent(int, android.os.Bundle);
    method public default void onLanguageDetection(@NonNull android.os.Bundle);
    method public void onPartialResults(android.os.Bundle);
    method public void onReadyForSpeech(android.os.Bundle);
    method public void onResults(android.os.Bundle);
@@ -41443,6 +41444,7 @@ package android.speech {
    method public void error(int) throws android.os.RemoteException;
    method @NonNull public android.content.AttributionSource getCallingAttributionSource();
    method public int getCallingUid();
    method public void languageDetection(@NonNull android.os.Bundle);
    method public void partialResults(android.os.Bundle) throws android.os.RemoteException;
    method public void readyForSpeech(android.os.Bundle) throws android.os.RemoteException;
    method public void results(android.os.Bundle) throws android.os.RemoteException;
@@ -41500,10 +41502,14 @@ package android.speech {
    field public static final String EXTRA_CONFIDENCE_SCORES = "android.speech.extra.CONFIDENCE_SCORES";
    field public static final String EXTRA_ENABLE_BIASING_DEVICE_CONTEXT = "android.speech.extra.ENABLE_BIASING_DEVICE_CONTEXT";
    field public static final String EXTRA_ENABLE_FORMATTING = "android.speech.extra.ENABLE_FORMATTING";
    field public static final String EXTRA_ENABLE_LANGUAGE_DETECTION = "android.speech.extra.ENABLE_LANGUAGE_DETECTION";
    field public static final String EXTRA_ENABLE_LANGUAGE_SWITCH = "android.speech.extra.ENABLE_LANGUAGE_SWITCH";
    field public static final String EXTRA_HIDE_PARTIAL_TRAILING_PUNCTUATION = "android.speech.extra.HIDE_PARTIAL_TRAILING_PUNCTUATION";
    field public static final String EXTRA_LANGUAGE = "android.speech.extra.LANGUAGE";
    field public static final String EXTRA_LANGUAGE_DETECTION_ALLOWED_LANGUAGES = "android.speech.extra.LANGUAGE_DETECTION_ALLOWED_LANGUAGES";
    field public static final String EXTRA_LANGUAGE_MODEL = "android.speech.extra.LANGUAGE_MODEL";
    field public static final String EXTRA_LANGUAGE_PREFERENCE = "android.speech.extra.LANGUAGE_PREFERENCE";
    field public static final String EXTRA_LANGUAGE_SWITCH_ALLOWED_LANGUAGES = "android.speech.extra.LANGUAGE_SWITCH_ALLOWED_LANGUAGES";
    field public static final String EXTRA_MASK_OFFENSIVE_WORDS = "android.speech.extra.MASK_OFFENSIVE_WORDS";
    field public static final String EXTRA_MAX_RESULTS = "android.speech.extra.MAX_RESULTS";
    field public static final String EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE = "android.speech.extra.ONLY_RETURN_LANGUAGE_PREFERENCE";
@@ -41527,6 +41533,9 @@ package android.speech {
    field public static final String FORMATTING_OPTIMIZE_QUALITY = "quality";
    field public static final String LANGUAGE_MODEL_FREE_FORM = "free_form";
    field public static final String LANGUAGE_MODEL_WEB_SEARCH = "web_search";
    field public static final String LANGUAGE_SWITCH_BALANCED = "balanced";
    field public static final String LANGUAGE_SWITCH_HIGH_PRECISION = "high_precision";
    field public static final String LANGUAGE_SWITCH_QUICK_RESPONSE = "quick_response";
    field public static final int RESULT_AUDIO_ERROR = 5; // 0x5
    field public static final int RESULT_CLIENT_ERROR = 2; // 0x2
    field public static final int RESULT_NETWORK_ERROR = 4; // 0x4
@@ -41560,6 +41569,7 @@ package android.speech {
    method @MainThread public void stopListening();
    method public void triggerModelDownload(@NonNull android.content.Intent);
    field public static final String CONFIDENCE_SCORES = "confidence_scores";
    field public static final String DETECTED_LANGUAGE = "detected_language";
    field public static final int ERROR_AUDIO = 3; // 0x3
    field public static final int ERROR_CANNOT_CHECK_SUPPORT = 14; // 0xe
    field public static final int ERROR_CANNOT_LISTEN_TO_DOWNLOAD_EVENTS = 15; // 0xf
@@ -41575,9 +41585,20 @@ package android.speech {
    field public static final int ERROR_SERVER_DISCONNECTED = 11; // 0xb
    field public static final int ERROR_SPEECH_TIMEOUT = 6; // 0x6
    field public static final int ERROR_TOO_MANY_REQUESTS = 10; // 0xa
    field public static final String LANGUAGE_DETECTION_CONFIDENCE_LEVEL = "language_detection_confidence_level";
    field public static final int LANGUAGE_DETECTION_CONFIDENCE_LEVEL_CONFIDENT = 2; // 0x2
    field public static final int LANGUAGE_DETECTION_CONFIDENCE_LEVEL_HIGHLY_CONFIDENT = 3; // 0x3
    field public static final int LANGUAGE_DETECTION_CONFIDENCE_LEVEL_NOT_CONFIDENT = 1; // 0x1
    field public static final int LANGUAGE_DETECTION_CONFIDENCE_LEVEL_UNKNOWN = 0; // 0x0
    field public static final String LANGUAGE_SWITCH_RESULT = "language_switch_result";
    field public static final int LANGUAGE_SWITCH_RESULT_FAILED = 2; // 0x2
    field public static final int LANGUAGE_SWITCH_RESULT_NOT_ATTEMPTED = 0; // 0x0
    field public static final int LANGUAGE_SWITCH_RESULT_SKIPPED_NO_MODEL = 3; // 0x3
    field public static final int LANGUAGE_SWITCH_RESULT_SUCCEEDED = 1; // 0x1
    field public static final String RECOGNITION_PARTS = "recognition_parts";
    field public static final String RESULTS_ALTERNATIVES = "results_alternatives";
    field public static final String RESULTS_RECOGNITION = "results_recognition";
    field public static final String TOP_LOCALE_ALTERNATIVES = "top_locale_alternatives";
  }
}
+10 −0
Original line number Diff line number Diff line
@@ -95,6 +95,16 @@ oneway interface IRecognitionListener {
     */
    void onEndOfSegmentedSession();

    /**
     * Called when the language detection (and switching) results are available.
     *
     * @param results a Bundle containing the identifiers of the most confidently detected language,
     * the confidence level of the detection,
     * the alternative locales for the most confidently detected language,
     * and the results of the language switching.
     */
    void onLanguageDetection(in Bundle results);

    /**
     * Reserved for adding future events.
     *
+30 −0
Original line number Diff line number Diff line
@@ -116,6 +116,36 @@ public interface RecognitionListener {
     */
    default void onEndOfSegmentedSession() {}

    /**
     * Called when the language detection (and switching) results are available. This callback
     * can be invoked on any number of occasions at any time between {@link #onBeginningOfSpeech()}
     * and {@link #onEndOfSpeech()}, depending on the speech recognition service implementation.
     *
     * <p> To request language detection,
     * use {@link RecognizerIntent#EXTRA_ENABLE_LANGUAGE_DETECTION}.
     * <p> To request automatic language switching,
     * use {@link RecognizerIntent#EXTRA_ENABLE_LANGUAGE_SWITCH}.
     *
     * @param results the returned language detection (and switching) results.
     *        <p> To retrieve the most confidently detected language IETF tag
     *        (as defined by BCP 47, e.g., "en-US", "de-DE"),
     *        use {@link Bundle#getString(String)}
     *        with {@link SpeechRecognizer#DETECTED_LANGUAGE} as the parameter.
     *        <p> To retrieve the language detection confidence level represented by a value
     *        prefixed by {@code LANGUAGE_DETECTION_CONFIDENCE_LEVEL_} and
     *        defined in {@link SpeechRecognizer}, use {@link Bundle#getInt(String)}
     *        with {@link SpeechRecognizer#LANGUAGE_DETECTION_CONFIDENCE_LEVEL} as the parameter.
     *        <p> To retrieve the alternative locales for the same language
     *        retrieved by the key {@link SpeechRecognizer#DETECTED_LANGUAGE},
     *        use {@link Bundle#getStringArrayList(String)}
     *        with {@link SpeechRecognizer#TOP_LOCALE_ALTERNATIVES} as the parameter.
     *        <p> To retrieve the language switching results represented by a value
     *        prefixed by {@code LANGUAGE_SWITCH_RESULT_} and defined in {@link SpeechRecognizer},
     *        use {@link Bundle#getInt(String)}
     *        with {@link SpeechRecognizer#LANGUAGE_SWITCH_RESULT} as the parameter.
     */
    default void onLanguageDetection(@NonNull Bundle results) {}

    /**
     * Reserved for adding future events.
     * 
+33 −0
Original line number Diff line number Diff line
@@ -650,6 +650,39 @@ public abstract class RecognitionService extends Service {
            mListener.onEndOfSegmentedSession();
        }

        /**
         * The service should call this method when the language detection (and switching)
         * results are available. This method can be called on any number of occasions
         * at any time between {@link #beginningOfSpeech()} and {@link #endOfSpeech()},
         * depending on the speech recognition service implementation.
         *
         * @param results the returned language detection (and switching) results.
         *        <p> To retrieve the most confidently detected language IETF tag
         *        (as defined by BCP 47, e.g., "en-US", "de-DE"),
         *        use {@link Bundle#getString(String)}
         *        with {@link SpeechRecognizer#DETECTED_LANGUAGE} as the parameter.
         *        <p> To retrieve the language detection confidence level represented by a value
         *        prefixed by {@code LANGUAGE_DETECTION_CONFIDENCE_LEVEL_} defined in
         *        {@link SpeechRecognizer}, use {@link Bundle#getInt(String)} with
         *        {@link SpeechRecognizer#LANGUAGE_DETECTION_CONFIDENCE_LEVEL} as the parameter.
         *        <p> To retrieve the alternative locales for the same language
         *        retrieved by the key {@link SpeechRecognizer#DETECTED_LANGUAGE},
         *        use {@link Bundle#getStringArrayList(String)}
         *        with {@link SpeechRecognizer#TOP_LOCALE_ALTERNATIVES} as the parameter.
         *        <p> To retrieve the language switching results represented by a value
         *        prefixed by {@code LANGUAGE_SWITCH_RESULT_}
         *        and defined in {@link SpeechRecognizer}, use {@link Bundle#getInt(String)}
         *        with {@link SpeechRecognizer#LANGUAGE_SWITCH_RESULT} as the parameter.
         */
        @SuppressLint("CallbackMethodName") // For consistency with existing methods.
        public void languageDetection(@NonNull Bundle results) {
            try {
                mListener.onLanguageDetection(results);
            } catch (RemoteException e) {
                throw e.rethrowFromSystemServer();
            }
        }

        /**
         * Return the Linux uid assigned to the process that sent you the current transaction that
         * is being processed. This is obtained from {@link Binder#getCallingUid()}.
+81 −0
Original line number Diff line number Diff line
@@ -572,4 +572,85 @@ public class RecognizerIntent {
     */
    public static final String EXTRA_REQUEST_WORD_CONFIDENCE =
            "android.speech.extra.REQUEST_WORD_CONFIDENCE";

    /**
     * Optional boolean indicating whether to enable language detection. When enabled, the
     * recognizer will consistently identify the language of the current spoken utterance and
     * provide that info via {@link RecognitionListener#onLanguageDetection(Bundle)}.
     *
     * <p> Depending on the recognizer implementation, this flag may have no effect.
     */
    public static final String EXTRA_ENABLE_LANGUAGE_DETECTION =
            "android.speech.extra.ENABLE_LANGUAGE_DETECTION";

    /**
     * Optional list of IETF language tags (as defined by BCP 47, e.g. "en-US", "de-DE").
     * This extra is to be used with {@link #EXTRA_ENABLE_LANGUAGE_DETECTION}.
     * If set, the recognizer will constrain the language detection output
     * to this list of languages, potentially improving detection accuracy.
     */
    public static final String EXTRA_LANGUAGE_DETECTION_ALLOWED_LANGUAGES =
            "android.speech.extra.LANGUAGE_DETECTION_ALLOWED_LANGUAGES";

    /**
     * Optional string to enable automatic switching to the language being spoken with
     * the desired sensitivity level, instead of being restricted to a single language.
     * The corresponding language models must be downloaded to support the switch.
     * Otherwise, the recognizer will report an error on a switch failure. The recognizer
     * provides the switch results via {@link RecognitionListener#onLanguageDetection(Bundle)}.
     *
     * <p> Since detection is a necessary requirement for the language switching,
     * setting this value implicitly enables {@link #EXTRA_ENABLE_LANGUAGE_DETECTION}.
     *
     * <p> Depending on the recognizer implementation, this value may have no effect.
     *
     * @see #LANGUAGE_SWITCH_HIGH_PRECISION
     * @see #LANGUAGE_SWITCH_BALANCED
     * @see #LANGUAGE_SWITCH_QUICK_RESPONSE
     */
    public static final String EXTRA_ENABLE_LANGUAGE_SWITCH =
            "android.speech.extra.ENABLE_LANGUAGE_SWITCH";

    /**
     * A value to use for {@link #EXTRA_ENABLE_LANGUAGE_SWITCH}.
     *
     * <p> Enables language switch only when a new language is detected as
     * {@link SpeechRecognizer#LANGUAGE_DETECTION_CONFIDENCE_LEVEL_HIGHLY_CONFIDENT},
     * which means the service may wait for longer before switching.
     *
     * @see #EXTRA_ENABLE_LANGUAGE_SWITCH
     */
    public static final String LANGUAGE_SWITCH_HIGH_PRECISION = "high_precision";

    /**
     * A value to use for {@link #EXTRA_ENABLE_LANGUAGE_SWITCH}.
     *
     * <p> Enables language switch only when a new language is detected as at least
     * {@link SpeechRecognizer#LANGUAGE_DETECTION_CONFIDENCE_LEVEL_CONFIDENT}, which means
     * the service is balancing between detecting a new language confidently and switching early.
     *
     * @see #EXTRA_ENABLE_LANGUAGE_SWITCH
     */
    public static final String LANGUAGE_SWITCH_BALANCED = "balanced";

    /**
     * A value to use for {@link #EXTRA_ENABLE_LANGUAGE_SWITCH}.
     *
     * <p> Enables language switch only when a new language is detected as at least
     * {@link SpeechRecognizer#LANGUAGE_DETECTION_CONFIDENCE_LEVEL_NOT_CONFIDENT},
     * which means the service should switch at the earliest moment possible.
     *
     * @see #EXTRA_ENABLE_LANGUAGE_SWITCH
     */
    public static final String LANGUAGE_SWITCH_QUICK_RESPONSE = "quick_response";

    /**
     * Optional list of IETF language tags (as defined by BCP 47, e.g. "en-US", "de-DE"). This extra
     * is to be used with {@link #EXTRA_ENABLE_LANGUAGE_SWITCH}. If set, the recognizer will apply
     * the auto switch only to these languages, even if the speech models of other languages also
     * exist. The corresponding language models must be downloaded to support the switch.
     * Otherwise, the recognizer will report an error on a switch failure.
     */
    public static final String EXTRA_LANGUAGE_SWITCH_ALLOWED_LANGUAGES =
            "android.speech.extra.LANGUAGE_SWITCH_ALLOWED_LANGUAGES";
}
Loading