Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1d87cf06 authored by Sergey Volnov's avatar Sergey Volnov
Browse files

Refactor audio flows so that server streams audio to the client.

This way server has control over when to stop recording (in case clients
time out or don't start consuming bytes) AND it solves the attribution
problem (Voice Interaction Service would only get the capture session upon a positive
trigger, so would be "blamed" then).

Test: atest CtsVoiceInteractionTestCases
Bug: 168305377
Change-Id: If5bead87e88cfc9e31393029df5389afb6922183
parent 04d9bf62
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -10626,7 +10626,7 @@ package android.service.voice {
  public abstract class HotwordDetectionService extends android.app.Service {
    ctor public HotwordDetectionService();
    method @Nullable public final android.os.IBinder onBind(@NonNull android.content.Intent);
    method public void onDetectFromDspSource(int, @NonNull android.service.voice.HotwordDetectionService.DspHotwordDetectionCallback);
    method public void onDetectFromDspSource(@NonNull android.os.ParcelFileDescriptor, @NonNull android.media.AudioFormat, long, @NonNull android.service.voice.HotwordDetectionService.DspHotwordDetectionCallback);
    field public static final String SERVICE_INTERFACE = "android.service.voice.HotwordDetectionService";
  }
+24 −5
Original line number Diff line number Diff line
@@ -19,15 +19,18 @@ package android.service.voice;
import static com.android.internal.util.function.pooled.PooledLambda.obtainMessage;

import android.annotation.CallSuper;
import android.annotation.DurationMillisLong;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.annotation.SdkConstant;
import android.annotation.SystemApi;
import android.app.Service;
import android.content.Intent;
import android.media.AudioFormat;
import android.os.Handler;
import android.os.IBinder;
import android.os.Looper;
import android.os.ParcelFileDescriptor;
import android.os.RemoteException;
import android.util.Log;

@@ -57,14 +60,21 @@ public abstract class HotwordDetectionService extends Service {

    private final IHotwordDetectionService mInterface = new IHotwordDetectionService.Stub() {
        @Override
        public void detectFromDspSource(int sessionId, IDspHotwordDetectionCallback callback)
        public void detectFromDspSource(
                ParcelFileDescriptor audioStream,
                AudioFormat audioFormat,
                long timeoutMillis,
                IDspHotwordDetectionCallback callback)
                throws RemoteException {
            if (DBG) {
                Log.d(TAG, "#detectFromDspSource");
            }
            mHandler.sendMessage(obtainMessage(HotwordDetectionService::onDetectFromDspSource,
                    HotwordDetectionService.this,
                    sessionId, new DspHotwordDetectionCallback(callback)));
                    audioStream,
                    audioFormat,
                    timeoutMillis,
                    new DspHotwordDetectionCallback(callback)));
        }
    };

@@ -89,15 +99,24 @@ public abstract class HotwordDetectionService extends Service {
    /**
     * Detect the audio data generated from Dsp.
     *
     * @param sessionId The session to use when attempting to capture more audio from the DSP
     *                  hardware.
     * <p>Note: the clients are supposed to call {@code close} on the input stream when they are
     * done with the operation in order to free up resources.
     *
     * @param audioStream Stream containing audio bytes returned from DSP
     * @param audioFormat Format of the supplied audio
     * @param timeoutMillis Timeout in milliseconds for the operation to invoke the callback. If
     *                      the application fails to abide by the timeout, system will close the
     *                      microphone and cancel the operation.
     * @param callback Use {@link HotwordDetectionService#DspHotwordDetectionCallback} to return
     * the detected result.
     *
     * @hide
     */
    @SystemApi
    public void onDetectFromDspSource(int sessionId,
    public void onDetectFromDspSource(
            @NonNull ParcelFileDescriptor audioStream,
            @NonNull AudioFormat audioFormat,
            @DurationMillisLong long timeoutMillis,
            @NonNull DspHotwordDetectionCallback callback) {
    }

+7 −1
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@

package android.service.voice;

import android.media.AudioFormat;
import android.os.ParcelFileDescriptor;
import android.service.voice.IDspHotwordDetectionCallback;

/**
@@ -24,5 +26,9 @@ import android.service.voice.IDspHotwordDetectionCallback;
 * @hide
 */
oneway interface IHotwordDetectionService {
    void detectFromDspSource(int sessionId, in IDspHotwordDetectionCallback callback);
    void detectFromDspSource(
    in ParcelFileDescriptor audioStream,
    in AudioFormat audioFormat,
    long timeoutMillis,
    in IDspHotwordDetectionCallback callback);
}
+154 −24
Original line number Diff line number Diff line
@@ -22,25 +22,49 @@ import android.content.Context;
import android.content.Intent;
import android.hardware.soundtrigger.IRecognitionStatusCallback;
import android.hardware.soundtrigger.SoundTrigger;
import android.media.AudioAttributes;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.os.ParcelFileDescriptor;
import android.os.RemoteException;
import android.service.voice.AlwaysOnHotwordDetector;
import android.service.voice.HotwordDetectionService;
import android.service.voice.IDspHotwordDetectionCallback;
import android.service.voice.IHotwordDetectionService;
import android.util.Pair;
import android.util.Slog;

import com.android.internal.app.IHotwordRecognitionStatusCallback;
import com.android.internal.infra.ServiceConnector;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;

/**
 * A class that provides the communication with the HotwordDetectionService.
 */
final class HotwordDetectionConnection {
    static final String TAG = "HotwordDetectionConnection";
    private static final String TAG = "HotwordDetectionConnection";
    // TODO (b/177502877): Set the Debug flag to false before shipping.
    static final boolean DEBUG = true;
    private static final boolean DEBUG = true;

    // Number of bytes per sample of audio (which is a short).
    private static final int BYTES_PER_SAMPLE = 2;
    // TODO: These constants need to be refined.
    private static final long VALIDATION_TIMEOUT_MILLIS = 3000;
    private static final long VOICE_INTERACTION_TIMEOUT_TO_OPEN_MIC_MILLIS = 2000;
    private static final int MAX_STREAMING_SECONDS = 10;

    private final Executor mAudioCopyExecutor = Executors.newCachedThreadPool();
    // TODO: This may need to be a Handler(looper)
    private final ScheduledExecutorService mScheduledExecutorService =
            Executors.newSingleThreadScheduledExecutor();

    final Object mLock;
    final ComponentName mDetectionComponentName;
@@ -93,31 +117,75 @@ final class HotwordDetectionConnection {
        }
    }

    private void detectFromDspSource(int sessionId, IDspHotwordDetectionCallback callback) {
    private void detectFromDspSource(SoundTrigger.KeyphraseRecognitionEvent recognitionEvent,
            IHotwordRecognitionStatusCallback externalCallback) {
        if (DEBUG) {
            Slog.d(TAG, "detectFromDspSource");
        }
        mRemoteHotwordDetectionService.run(
                service -> service.detectFromDspSource(sessionId, callback));

        AudioRecord record = createAudioRecord(recognitionEvent);

        Pair<ParcelFileDescriptor, ParcelFileDescriptor> clientPipe = createPipe();

        if (clientPipe == null) {
            // Error.
            // Need to propagate as unknown error or something?
            return;
        }
        ParcelFileDescriptor audioSink = clientPipe.second;
        ParcelFileDescriptor clientRead = clientPipe.first;

    static final class SoundTriggerCallback extends IRecognitionStatusCallback.Stub {
        private SoundTrigger.KeyphraseRecognitionEvent mRecognitionEvent;
        private final HotwordDetectionConnection mHotwordDetectionConnection;
        private final IHotwordRecognitionStatusCallback mExternalCallback;
        private final IDspHotwordDetectionCallback mInternalCallback;
        record.startRecording();

        SoundTriggerCallback(IHotwordRecognitionStatusCallback callback,
                HotwordDetectionConnection connection) {
            mHotwordDetectionConnection = connection;
            mExternalCallback = callback;
            mInternalCallback = new IDspHotwordDetectionCallback.Stub() {
        mAudioCopyExecutor.execute(() -> {
            try (OutputStream fos =
                         new ParcelFileDescriptor.AutoCloseOutputStream(audioSink)) {
                byte[] buffer = new byte[1024];

                while (true) {
                    int bytesRead = record.read(buffer, 0, 1024);

                    if (bytesRead < 0) {
                        break;
                    }

                    fos.write(buffer, 0, bytesRead);
                }
            } catch (IOException e) {
                Slog.w(TAG, "Failed supplying audio data to validator", e);
            }
        });

        Runnable cancellingJob = () -> {
            record.stop();
            bestEffortCloseFileDescriptor(audioSink);
            // TODO: consider calling externalCallback.onRejected(ERROR_TIMEOUT).
        };

        ScheduledFuture<?> cancelingFuture =
                mScheduledExecutorService.schedule(
                        cancellingJob, VALIDATION_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);

        // TODO: consider making this a non-anonymous class.
        IDspHotwordDetectionCallback internalCallback = new IDspHotwordDetectionCallback.Stub() {
            @Override
            public void onDetected() throws RemoteException {
                if (DEBUG) {
                    Slog.d(TAG, "onDetected");
                }
                    mExternalCallback.onKeyphraseDetected(mRecognitionEvent);
                bestEffortCloseFileDescriptor(audioSink);
                cancelingFuture.cancel(true);

                // Give 2 more seconds for the interactor to start consuming the mic. If it fails to
                // do so under the given time, we'll force-close the mic to make sure resources are
                // freed up.
                // TODO: consider modelling these 2 seconds in the API.
                mScheduledExecutorService.schedule(
                        cancellingJob,
                        VOICE_INTERACTION_TIMEOUT_TO_OPEN_MIC_MILLIS,
                        TimeUnit.MILLISECONDS);

                externalCallback.onKeyphraseDetected(recognitionEvent);
            }

            @Override
@@ -125,10 +193,30 @@ final class HotwordDetectionConnection {
                if (DEBUG) {
                    Slog.d(TAG, "onRejected");
                }
                    mExternalCallback.onRejected(
                cancelingFuture.cancel(true);
                externalCallback.onRejected(
                        AlwaysOnHotwordDetector.HOTWORD_DETECTION_FALSE_ALERT);
            }
        };

        mRemoteHotwordDetectionService.run(
                service -> service.detectFromDspSource(
                        clientRead,
                        recognitionEvent.getCaptureFormat(),
                        VALIDATION_TIMEOUT_MILLIS,
                        internalCallback));
        bestEffortCloseFileDescriptor(clientRead);
    }

    static final class SoundTriggerCallback extends IRecognitionStatusCallback.Stub {
        private SoundTrigger.KeyphraseRecognitionEvent mRecognitionEvent;
        private final HotwordDetectionConnection mHotwordDetectionConnection;
        private final IHotwordRecognitionStatusCallback mExternalCallback;

        SoundTriggerCallback(IHotwordRecognitionStatusCallback callback,
                HotwordDetectionConnection connection) {
            mHotwordDetectionConnection = connection;
            mExternalCallback = callback;
        }

        @Override
@@ -142,7 +230,7 @@ final class HotwordDetectionConnection {
            if (useHotwordDetectionService) {
                mRecognitionEvent = recognitionEvent;
                mHotwordDetectionConnection.detectFromDspSource(
                        recognitionEvent.getCaptureSession(), mInternalCallback);
                        recognitionEvent, mExternalCallback);
            } else {
                mExternalCallback.onKeyphraseDetected(recognitionEvent);
            }
@@ -171,6 +259,48 @@ final class HotwordDetectionConnection {
        }
    }

    // TODO: figure out if we need to let the client configure some of the parameters.
    private static AudioRecord createAudioRecord(
            @NonNull SoundTrigger.KeyphraseRecognitionEvent recognitionEvent) {
        int sampleRate = recognitionEvent.getCaptureFormat().getSampleRate();
        return new AudioRecord(
                new AudioAttributes.Builder()
                        .setInternalCapturePreset(MediaRecorder.AudioSource.HOTWORD).build(),
                recognitionEvent.getCaptureFormat(),
                getBufferSizeInBytes(sampleRate, MAX_STREAMING_SECONDS),
                recognitionEvent.getCaptureSession());
    }

    /**
     * Returns the number of bytes required to store {@code bufferLengthSeconds} of audio sampled at
     * {@code sampleRate} Hz, using the format returned by DSP audio capture.
     */
    private static int getBufferSizeInBytes(int sampleRate, int bufferLengthSeconds) {
        return BYTES_PER_SAMPLE * sampleRate * bufferLengthSeconds;
    }

    private static Pair<ParcelFileDescriptor, ParcelFileDescriptor> createPipe() {
        ParcelFileDescriptor[] fileDescriptors;
        try {
            fileDescriptors = ParcelFileDescriptor.createPipe();
        } catch (IOException e) {
            Slog.e(TAG, "Failed to create audio stream pipe", e);
            return null;
        }

        return Pair.create(fileDescriptors[0], fileDescriptors[1]);
    }

    private static void bestEffortCloseFileDescriptor(ParcelFileDescriptor fd) {
        try {
            fd.close();
        } catch (IOException e) {
            if (DEBUG) {
                Slog.w(TAG, "Failed closing file descriptor", e);
            }
        }
    }

    public void dump(String prefix, PrintWriter pw) {
        pw.print(prefix); pw.print("mBound="); pw.println(mBound);
    }