Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e6cd2476 authored by Sandeep Siddhartha's avatar Sandeep Siddhartha
Browse files

Add recognition modes to the enrollment metadata

This will be used by the Voice interaction service to determine what type of recognition may be run
on the DSP. e.g. If the DSP supports voice trigger only for the given keyphrase,
the voice interaction service may want to perform user identification at its end.

Also support keyphrase metadata for all keyphrases and locales.
In case the enrollment app supports open-ended keyphrases, it can leave the keyphrase text
to be empty
similarly, if the enrollment app supports all locales, it can leave the supported locales
attribute to be empty,

Change-Id: I782a17a877fc79ed569fa7c3a81697641182590b
parent 51344781
Loading
Loading
Loading
Loading
+13 −7
Original line number Diff line number Diff line
@@ -1061,6 +1061,7 @@ package android {
    field public static final int searchIcon = 16843909; // 0x1010485
    field public static final int searchKeyphrase = 16843873; // 0x1010461
    field public static final int searchKeyphraseId = 16843872; // 0x1010460
    field public static final int searchKeyphraseRecognitionFlags = 16843947; // 0x10104ab
    field public static final int searchKeyphraseSupportedLocales = 16843874; // 0x1010462
    field public static final int searchMode = 16843221; // 0x10101d5
    field public static final int searchSettingsDescription = 16843402; // 0x101028a
@@ -26957,7 +26958,8 @@ package android.service.voice {
    method public int getAvailability();
    method public android.content.Intent getManageIntent(int);
    method public int getRecognitionStatus();
    method public int startRecognition();
    method public int getSupportedRecognitionModes();
    method public int startRecognition(int);
    method public int stopRecognition();
    field public static final int KEYPHRASE_ENROLLED = 2; // 0x2
    field public static final int KEYPHRASE_HARDWARE_UNAVAILABLE = -2; // 0xfffffffe
@@ -26966,17 +26968,21 @@ package android.service.voice {
    field public static final int MANAGE_ACTION_ENROLL = 0; // 0x0
    field public static final int MANAGE_ACTION_RE_ENROLL = 1; // 0x1
    field public static final int MANAGE_ACTION_UN_ENROLL = 2; // 0x2
    field public static final int RECOGNITION_ACTIVE = 2; // 0x2
    field public static final int RECOGNITION_DISABLED_TEMPORARILY = -1; // 0xffffffff
    field public static final int RECOGNITION_NOT_AVAILABLE = -3; // 0xfffffffd
    field public static final int RECOGNITION_NOT_REQUESTED = -2; // 0xfffffffe
    field public static final int RECOGNITION_REQUESTED = 1; // 0x1
    field public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 1; // 0x1
    field public static final int RECOGNITION_FLAG_NONE = 0; // 0x0
    field public static final int RECOGNITION_MODE_USER_IDENTIFICATION = 2; // 0x2
    field public static final int RECOGNITION_MODE_VOICE_TRIGGER = 1; // 0x1
    field public static final int RECOGNITION_STATUS_ACTIVE = 16; // 0x10
    field public static final int RECOGNITION_STATUS_DISABLED_TEMPORARILY = 8; // 0x8
    field public static final int RECOGNITION_STATUS_NOT_AVAILABLE = 1; // 0x1
    field public static final int RECOGNITION_STATUS_NOT_REQUESTED = 2; // 0x2
    field public static final int RECOGNITION_STATUS_REQUESTED = 4; // 0x4
    field public static final int STATUS_ERROR = -2147483648; // 0x80000000
    field public static final int STATUS_OK = 1; // 0x1
  }
  public static abstract interface AlwaysOnHotwordDetector.Callback {
    method public abstract void onDetected();
    method public abstract void onDetected(byte[]);
    method public abstract void onDetectionStarted();
    method public abstract void onDetectionStopped();
  }
+8 −3
Original line number Diff line number Diff line
@@ -156,6 +156,9 @@ public class KeyphraseEnrollmentInfo {
            if (searchKeyphraseId != -1) {
                String searchKeyphrase = array.getString(com.android.internal.R.styleable
                        .VoiceEnrollmentApplication_searchKeyphrase);
                if (searchKeyphrase == null) {
                    searchKeyphrase = "";
                }
                String searchKeyphraseSupportedLocales =
                        array.getString(com.android.internal.R.styleable
                                .VoiceEnrollmentApplication_searchKeyphraseSupportedLocales);
@@ -165,9 +168,11 @@ public class KeyphraseEnrollmentInfo {
                        && !searchKeyphraseSupportedLocales.isEmpty()) {
                    supportedLocales = searchKeyphraseSupportedLocales.split(",");
                }
                int recognitionModes = array.getInt(com.android.internal.R.styleable
                        .VoiceEnrollmentApplication_searchKeyphraseRecognitionFlags, 0);
                mKeyphrases = new KeyphraseMetadata[1];
                mKeyphrases[0] = new KeyphraseMetadata(
                        searchKeyphraseId, searchKeyphrase, supportedLocales);
                        searchKeyphraseId, searchKeyphrase, supportedLocales, recognitionModes);
            } else {
                mParseError = "searchKeyphraseId not specified in meta-data";
                return;
@@ -239,8 +244,8 @@ public class KeyphraseEnrollmentInfo {
     * @param keyphrase The keyphrase that the user needs to be enrolled to.
     * @param locale The locale for which the enrollment needs to be performed.
     *        This is a Java locale, for example "en_US".
     * @return The metadata, if an enrollment client supports the given keyphrase
     *         and the given locale, null otherwise.
     * @return The metadata, if the enrollment client supports the given keyphrase
     *         and locale, null otherwise.
     */
    public KeyphraseMetadata getKeyphraseMetadata(String keyphrase, String locale) {
        if (mKeyphrases == null || mKeyphrases.length == 0) {
+8 −6
Original line number Diff line number Diff line
@@ -27,34 +27,36 @@ public class KeyphraseMetadata {
    public final int id;
    public final String keyphrase;
    public final ArraySet<String> supportedLocales;
    public final int recognitionModeFlags;

    public KeyphraseMetadata(int id, String keyphrase, String[] supportedLocales) {
    public KeyphraseMetadata(int id, String keyphrase, String[] supportedLocales,
            int recognitionModeFlags) {
        this.id = id;
        this.keyphrase = keyphrase;
        this.supportedLocales = new ArraySet<String>(supportedLocales.length);
        for (String locale : supportedLocales) {
            this.supportedLocales.add(locale);
        }
        this.recognitionModeFlags = recognitionModeFlags;
    }

    @Override
    public String toString() {
        return "id=" + id + ", keyphrase=" + keyphrase + ", supported-locales=" + supportedLocales;
        return "id=" + id + ", keyphrase=" + keyphrase + ", supported-locales=" + supportedLocales
                + ", recognition-modes=" + recognitionModeFlags;
    }

    /**
     * @return Indicates if we support the given phrase.
     */
    public boolean supportsPhrase(String phrase) {
        // TODO(sansid): Come up with a scheme for custom keyphrases that should always match.
        return keyphrase.equalsIgnoreCase(phrase);
        return keyphrase.isEmpty() || keyphrase.equalsIgnoreCase(phrase);
    }

    /**
     * @return Indicates if we support the given locale.
     */
    public boolean supportsLocale(String locale) {
        // TODO(sansid): Come up with a scheme for keyphrases that are available in all locales.
        return supportedLocales.contains(locale);
        return supportedLocales.isEmpty() || supportedLocales.contains(locale);
    }
}
+8 −3
Original line number Diff line number Diff line
@@ -65,8 +65,12 @@ public class SoundTriggerHelper implements SoundTrigger.StatusListener {
     * The callback for sound trigger events.
     */
    public interface Listener {
        /** Called when the given keyphrase is spoken. */
        void onKeyphraseSpoken();
        /**
         * Called when the given keyphrase is spoken.
         *
         * @param data The captured audio, may be null.
         */
        void onKeyphraseSpoken(byte[] data);

        /**
         * Called when the listening state for the given keyphrase changes.
@@ -226,7 +230,8 @@ public class SoundTriggerHelper implements SoundTrigger.StatusListener {

        switch (event.status) {
            case SoundTrigger.RECOGNITION_STATUS_SUCCESS:
                listener.onKeyphraseSpoken();
                // TODO: Pass the captured audio back.
                listener.onKeyphraseSpoken(null);
                break;
            case SoundTrigger.RECOGNITION_STATUS_ABORT:
                listener.onListeningStateChanged(STATE_STOPPED);
+74 −26
Original line number Diff line number Diff line
@@ -66,18 +66,42 @@ public class AlwaysOnHotwordDetector {
    public static final int MANAGE_ACTION_UN_ENROLL = 2;

    /**
     * Return codes for {@link #startRecognition()}, {@link #stopRecognition()}
     * Return codes for {@link #startRecognition(int)}, {@link #stopRecognition()}
     */
    public static final int STATUS_ERROR = Integer.MIN_VALUE;
    public static final int STATUS_OK = 1;

    //---- Keyphrase recognition status ----//
    // TODO: Figure out if they are exclusive or should be flags instead?
    public static final int RECOGNITION_NOT_AVAILABLE = -3;
    public static final int RECOGNITION_NOT_REQUESTED = -2;
    public static final int RECOGNITION_DISABLED_TEMPORARILY = -1;
    public static final int RECOGNITION_REQUESTED = 1;
    public static final int RECOGNITION_ACTIVE = 2;
    /** Indicates that recognition is not available. */
    public static final int RECOGNITION_STATUS_NOT_AVAILABLE = 0x01;
    /** Indicates that recognition has not been requested. */
    public static final int RECOGNITION_STATUS_NOT_REQUESTED = 0x02;
    /** Indicates that recognition has been requested. */
    public static final int RECOGNITION_STATUS_REQUESTED = 0x04;
    /** Indicates that recognition has been temporarily disabled. */
    public static final int RECOGNITION_STATUS_DISABLED_TEMPORARILY = 0x08;
    /** Indicates that recognition is currently active . */
    public static final int RECOGNITION_STATUS_ACTIVE = 0x10;

    //-- Flags for startRecogntion    ----//
    /** Empty flag for {@link #startRecognition(int)}. */
    public static final int RECOGNITION_FLAG_NONE = 0;
    /**
     * Recognition flag for {@link #startRecognition(int)} that indicates
     * whether the trigger audio for hotword needs to be captured.
     */
    public static final int RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO = 0x1;

    //---- Recognition mode flags ----//
    // Must be kept in sync with the related attribute defined as searchKeyphraseRecognitionFlags.

    /** Simple recognition of the key phrase. Returned by {@link #getRecognitionStatus()} */
    public static final int RECOGNITION_MODE_VOICE_TRIGGER
            = SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER;
    /** Trigger only if one user is identified. Returned by {@link #getRecognitionStatus()} */
    public static final int RECOGNITION_MODE_USER_IDENTIFICATION
            = SoundTrigger.RECOGNITION_MODE_USER_IDENTIFICATION;

    static final String TAG = "AlwaysOnHotwordDetector";

    private final String mText;
@@ -107,9 +131,11 @@ public class AlwaysOnHotwordDetector {
    public interface Callback {
        /**
         * Called when the keyphrase is spoken.
         * TODO: Add more data to the callback.
         *
         * @param data Optional trigger audio data, if it was requested during
         *        {@link AlwaysOnHotwordDetector#startRecognition(int)}.
         */
        void onDetected();
        void onDetected(byte[] data);
        /**
         * Called when the detection for the associated keyphrase starts.
         */
@@ -166,47 +192,63 @@ public class AlwaysOnHotwordDetector {
    }

    /**
     * Gets the status of the recognition.
     * @return One of {@link #RECOGNITION_NOT_AVAILABLE}, {@link #RECOGNITION_NOT_REQUESTED},
     *         {@link #RECOGNITION_DISABLED_TEMPORARILY} or {@link #RECOGNITION_ACTIVE}.
     * @throws UnsupportedOperationException if the recognition isn't supported.
     * Gets the recognition modes supported by the associated keyphrase.
     *
     * @throws UnsupportedOperationException if the keyphrase itself isn't supported.
     *         Callers should check the availability by calling {@link #getAvailability()}
     *         before calling this method to avoid this exception.
     */
    public int getRecognitionStatus() {
        if (mAvailability != KEYPHRASE_ENROLLED) {
    public int getSupportedRecognitionModes() {
        if (mAvailability == KEYPHRASE_HARDWARE_UNAVAILABLE
                || mAvailability == KEYPHRASE_UNSUPPORTED) {
            throw new UnsupportedOperationException(
                    "Recognition for the given keyphrase is not supported");
                    "Getting supported recognition modes for the keyphrase is not supported");
        }

        return mKeyphraseMetadata.recognitionModeFlags;
    }

    /**
     * Gets the status of the recognition.
     * @return A flag comprised of {@link #RECOGNITION_STATUS_NOT_AVAILABLE},
     *         {@link #RECOGNITION_STATUS_NOT_REQUESTED}, {@link #RECOGNITION_STATUS_REQUESTED},
     *         {@link #RECOGNITION_STATUS_DISABLED_TEMPORARILY} and
     *         {@link #RECOGNITION_STATUS_ACTIVE}.
     */
    public int getRecognitionStatus() {
        return mRecognitionState;
    }

    /**
     * Starts recognition for the associated keyphrase.
     *
     * @param recognitionFlags The flags to control the recognition properties.
     *        The allowed flags are {@link #RECOGNITION_FLAG_NONE} and
     *        {@link #RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO}.
     * @return One of {@link #STATUS_ERROR} or {@link #STATUS_OK}.
     * @throws UnsupportedOperationException if the recognition isn't supported.
     *         Callers should check the availability by calling {@link #getAvailability()}
     *         before calling this method to avoid this exception.
     */
    public int startRecognition() {
        if (mAvailability != KEYPHRASE_ENROLLED) {
    public int startRecognition(int recognitionFlags) {
        if (mAvailability != KEYPHRASE_ENROLLED
                || (mRecognitionState&RECOGNITION_STATUS_NOT_AVAILABLE) != 0) {
            throw new UnsupportedOperationException(
                    "Recognition for the given keyphrase is not supported");
        }

        mRecognitionState = RECOGNITION_REQUESTED;
        mRecognitionState = RECOGNITION_REQUESTED;
        mRecognitionState &= RECOGNITION_STATUS_REQUESTED;
        KeyphraseRecognitionExtra[] recognitionExtra = new KeyphraseRecognitionExtra[1];
        // TODO: Do we need to do something about the confidence level here?
        // TODO: Read the recognition mode flag from the KeyphraseMetadata.
        // TODO: Take in captureTriggerAudio as a method param here.
        recognitionExtra[0] = new KeyphraseRecognitionExtra(mKeyphraseMetadata.id,
                SoundTrigger.RECOGNITION_MODE_VOICE_TRIGGER, new ConfidenceLevel[0]);
                mKeyphraseMetadata.recognitionModeFlags, new ConfidenceLevel[0]);
        boolean captureTriggerAudio =
                (recognitionFlags & RECOGNITION_FLAG_CAPTURE_TRIGGER_AUDIO) != 0;
        int code = mSoundTriggerHelper.startRecognition(mKeyphraseMetadata.id,
                mEnrolledSoundModel.convertToSoundTriggerKeyphraseSoundModel(), mListener,
                new RecognitionConfig(false, recognitionExtra, null /* additional data */));
                new RecognitionConfig(
                        captureTriggerAudio, recognitionExtra,null /* additional data */));
        if (code != SoundTriggerHelper.STATUS_OK) {
            Slog.w(TAG, "startRecognition() failed with error code " + code);
            return STATUS_ERROR;
@@ -229,7 +271,7 @@ public class AlwaysOnHotwordDetector {
                    "Recognition for the given keyphrase is not supported");
        }

        mRecognitionState = RECOGNITION_NOT_REQUESTED;
        mRecognitionState &= ~RECOGNITION_STATUS_NOT_REQUESTED;
        int code = mSoundTriggerHelper.stopRecognition(mKeyphraseMetadata.id, mListener);

        if (code != SoundTriggerHelper.STATUS_OK) {
@@ -269,16 +311,21 @@ public class AlwaysOnHotwordDetector {
    private int internalGetAvailability() {
        // No DSP available
        if (mSoundTriggerHelper.dspInfo == null) {
            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
            return KEYPHRASE_HARDWARE_UNAVAILABLE;
        }
        // No enrollment application supports this keyphrase/locale
        if (mKeyphraseMetadata == null) {
            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
            return KEYPHRASE_UNSUPPORTED;
        }
        // This keyphrase hasn't been enrolled.
        if (mEnrolledSoundModel == null) {
            mRecognitionState = RECOGNITION_STATUS_NOT_AVAILABLE;
            return KEYPHRASE_UNENROLLED;
        }
        // Mark recognition as available
        mRecognitionState &= ~RECOGNITION_STATUS_NOT_AVAILABLE;
        return KEYPHRASE_ENROLLED;
    }

@@ -320,14 +367,15 @@ public class AlwaysOnHotwordDetector {
        }

        @Override
        public void onKeyphraseSpoken() {
        public void onKeyphraseSpoken(byte[] data) {
            Slog.i(TAG, "onKeyphraseSpoken");
            mCallback.onDetected();
            mCallback.onDetected(data);
        }

        @Override
        public void onListeningStateChanged(int state) {
            Slog.i(TAG, "onListeningStateChanged: state=" + state);
            // TODO: Set/unset the RECOGNITION_STATUS_ACTIVE flag here.
            if (state == SoundTriggerHelper.STATE_STARTED) {
                mCallback.onDetectionStarted();
            } else if (state == SoundTriggerHelper.STATE_STOPPED) {
Loading