Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9d2d26af authored by Jean-Michel Trivi's avatar Jean-Michel Trivi
Browse files

Add support in TTS for volume and panning control of the synth output

Add two new parameters that are used when synthesizing text and
 playing it back directly to control the volume and left-right
 panning of the output.
Panning is applied using a balance law, which is not energy-preserving
 but which doesn't lower the volume when not panning / panning to
 center (legacy behavior).

Reduced amount of logs, and removed spoken text.

In TextToSpeech.java: added convenience method to handle the setting
 of the cached synthesis parameters.

Change-Id: I235d3d3193283ccc1891e2065d43787e3f63304d
parent 2cdee233
Loading
Loading
Loading
Loading
+81 −30
Original line number Diff line number Diff line
@@ -148,6 +148,23 @@ public class TextToSpeech {
         * {@hide}
         */
        public static final int DEFAULT_PITCH = 100;// 1x
        /**
         * {@hide}
         */
        public static final float DEFAULT_VOLUME = 1.0f;
        /**
         * {@hide}
         */
        protected static final String DEFAULT_VOLUME_STRING = "1.0";
        /**
         * {@hide}
         */
        public static final float DEFAULT_PAN = 0.0f;
        /**
         * {@hide}
         */
        protected static final String DEFAULT_PAN_STRING = "0.0";

        /**
         * {@hide}
         */
@@ -331,6 +348,24 @@ public class TextToSpeech {
         * @see TextToSpeech#synthesizeToFile(String, HashMap, String)
         */
        public static final String KEY_PARAM_UTTERANCE_ID = "utteranceId";
        /**
         * {@hide}
         * Parameter key to specify the speech volume relative to the current stream type
         * volume used when speaking text. Volume is specified as a float ranging from 0 to 1
         * where 0 is silence, and 1 is the maximum volume.
         * @see TextToSpeech#speak(String, int, HashMap)
         * @see TextToSpeech#playEarcon(String, int, HashMap)
         */
        public static final String KEY_PARAM_VOLUME = "volume";
        /**
         * {@hide}
         * Parameter key to specify how the speech is panned from left to right when speaking text.
         * Pan is specified as a float ranging from -1 to +1 where -1 maps to a hard-left pan,
         * 0 to center, and +1 to hard-right.
         * @see TextToSpeech#speak(String, int, HashMap)
         * @see TextToSpeech#playEarcon(String, int, HashMap)
         */
        public static final String KEY_PARAM_PAN = "pan";

        // key positions in the array of cached parameters
        /**
@@ -371,7 +406,18 @@ public class TextToSpeech {
        /**
         * {@hide}
         */
        protected static final int NB_CACHED_PARAMS = 8;
        protected static final int PARAM_POSITION_VOLUME = 16;

        /**
         * {@hide}
         */
        protected static final int PARAM_POSITION_PAN = 18;


        /**
         * {@hide}
         */
        protected static final int NB_CACHED_PARAMS = 20;
    }

    /**
@@ -416,6 +462,8 @@ public class TextToSpeech {
        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID] = Engine.KEY_PARAM_UTTERANCE_ID;
        mCachedParams[Engine.PARAM_POSITION_ENGINE] = Engine.KEY_PARAM_ENGINE;
        mCachedParams[Engine.PARAM_POSITION_PITCH] = Engine.KEY_PARAM_PITCH;
        mCachedParams[Engine.PARAM_POSITION_VOLUME] = Engine.KEY_PARAM_VOLUME;
        mCachedParams[Engine.PARAM_POSITION_PAN] = Engine.KEY_PARAM_PAN;

        // Leave all defaults that are shown in Settings uninitialized/at the default
        // so that the values set in Settings will take effect if the application does
@@ -429,6 +477,8 @@ public class TextToSpeech {
        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = "";
        mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = "";
        mCachedParams[Engine.PARAM_POSITION_PITCH + 1] = "100";
        mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING;
        mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING;

        initTts();
    }
@@ -717,24 +767,18 @@ public class TextToSpeech {
    {
        synchronized (mStartLock) {
            int result = ERROR;
            Log.i("TTS received: ", text);
            Log.i("TTS", "speak() queueMode=" + queueMode);
            if (!mStarted) {
                return result;
            }
            try {
                if ((params != null) && (!params.isEmpty())) {
                    String extra = params.get(Engine.KEY_PARAM_STREAM);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra;
                    }
                    extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
                    }
                    extra = params.get(Engine.KEY_PARAM_ENGINE);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra;
                    }
                    setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM);
                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
                            Engine.PARAM_POSITION_UTTERANCE_ID);
                    setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE);
                    setCachedParam(params, Engine.KEY_PARAM_VOLUME, Engine.PARAM_POSITION_VOLUME);
                    setCachedParam(params, Engine.KEY_PARAM_PAN, Engine.PARAM_POSITION_PAN);
                }
                result = mITts.speak(mPackageName, text, queueMode, mCachedParams);
            } catch (RemoteException e) {
@@ -791,10 +835,9 @@ public class TextToSpeech {
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra;
                    }
                    extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
                    }
                    setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM);
                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
                            Engine.PARAM_POSITION_UTTERANCE_ID);
                }
                result = mITts.playEarcon(mPackageName, earcon, queueMode, null);
            } catch (RemoteException e) {
@@ -845,10 +888,8 @@ public class TextToSpeech {
            }
            try {
                if ((params != null) && (!params.isEmpty())) {
                    String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
                    }
                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
                            Engine.PARAM_POSITION_UTTERANCE_ID);
                }
                result = mITts.playSilence(mPackageName, durationInMs, queueMode, mCachedParams);
            } catch (RemoteException e) {
@@ -870,6 +911,7 @@ public class TextToSpeech {
                mStarted = false;
                initTts();
            } finally {
                resetCachedParams();
                return result;
            }
        }
@@ -1224,6 +1266,7 @@ public class TextToSpeech {
     */
    public int synthesizeToFile(String text, HashMap<String,String> params,
            String filename) {
        Log.i("TTS", "synthesizeToFile()");
        synchronized (mStartLock) {
            int result = ERROR;
            if (!mStarted) {
@@ -1232,14 +1275,9 @@ public class TextToSpeech {
            try {
                if ((params != null) && (!params.isEmpty())) {
                    // no need to read the stream type here
                    String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
                    }
                    extra = params.get(Engine.KEY_PARAM_ENGINE);
                    if (extra != null) {
                        mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra;
                    }
                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
                            Engine.PARAM_POSITION_UTTERANCE_ID);
                    setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE);
                }
                result = mITts.synthesizeToFile(mPackageName, text, mCachedParams, filename) ?
                        SUCCESS : ERROR;
@@ -1277,6 +1315,19 @@ public class TextToSpeech {
        mCachedParams[Engine.PARAM_POSITION_STREAM + 1] =
                String.valueOf(Engine.DEFAULT_STREAM);
        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID+ 1] = "";
        mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING;
        mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING;
    }

    /**
     * Convenience method to save a parameter in the cached parameter array, at the given index,
     * for a property saved in the given hashmap.
     */
    private void setCachedParam(HashMap<String,String> params, String key, int keyIndex) {
        String extra = params.get(key);
        if (extra != null) {
            mCachedParams[keyIndex+1] = extra;
        }
    }

    /**
+41 −13
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@
#include <stdio.h>
#include <unistd.h>

#define LOG_TAG "SynthProxy"
#define LOG_TAG "SynthProxyJNI"

#include <utils/Log.h>
#include <nativehelper/jni.h>
@@ -33,8 +33,8 @@
#define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
#define DEFAULT_TTS_NB_CHANNELS 1
#define DEFAULT_TTS_BUFFERSIZE  2048
// TODO use the TTS stream type when available
#define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
#define DEFAULT_VOLUME          1.0f

// EQ + BOOST parameters
#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
@@ -165,6 +165,7 @@ class SynthProxyJniStorage {
        int                       mNbChannels;
        int8_t *                  mBuffer;
        size_t                    mBufferSize;
        float                     mVolume[2];

        SynthProxyJniStorage() {
            tts_ref = NULL;
@@ -179,6 +180,8 @@ class SynthProxyJniStorage {
            mBufferSize = DEFAULT_TTS_BUFFERSIZE;
            mBuffer = new int8_t[mBufferSize];
            memset(mBuffer, 0, mBufferSize);
            mVolume[AudioTrack::LEFT] = DEFAULT_VOLUME;
            mVolume[AudioTrack::RIGHT] = DEFAULT_VOLUME;
        }

        ~SynthProxyJniStorage() {
@@ -189,7 +192,7 @@ class SynthProxyJniStorage {
                mEngine = NULL;
            }
            if (mEngineLibHandle) {
                //LOGE("~SynthProxyJniStorage(): before close library");
                //LOGV("~SynthProxyJniStorage(): before close library");
                int res = dlclose(mEngineLibHandle);
                LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res);
            }
@@ -241,7 +244,7 @@ class SynthProxyJniStorage {
              mAudioOut = NULL;
            } else {
              //LOGI("AudioTrack OK");
              mAudioOut->setVolume(1.0f, 1.0f);
              mAudioOut->setVolume(mVolume[AudioTrack::LEFT], mVolume[AudioTrack::RIGHT]);
              LOGV("AudioTrack ready");
            }
            mPlayLock.unlock();
@@ -618,7 +621,7 @@ android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData,
    Mutex::Autolock l(engineMutex);

    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
    LOGI("setting speech rate to %d", speechRate);
    //LOGI("setting speech rate to %d", speechRate);
    android_tts_engine_t *engine = pSynthData->mEngine;

    if (engine) {
@@ -647,7 +650,7 @@ android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData,
    sprintf(buffer, "%d", pitch);

    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
    LOGI("setting pitch to %d", pitch);
    //LOGI("setting pitch to %d", pitch);
    android_tts_engine_t *engine = pSynthData->mEngine;

    if (engine) {
@@ -783,7 +786,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,

static int
android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
        jstring textJavaString, jint javaStreamType)
        jstring textJavaString, jint javaStreamType, jfloat volume, jfloat pan)
{
    int result = ANDROID_TTS_FAILURE;

@@ -798,9 +801,34 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,

    SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;

    pSynthData->mPlayLock.lock();
    {//scope for lock on mPlayLock
        Mutex::Autolock _l(pSynthData->mPlayLock);

        pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
    pSynthData->mPlayLock.unlock();

        // clip volume and pan
        float vol = (volume > 1.0f) ? 1.0f : (volume < 0.0f) ? 0.0f : volume;
        float panning = (pan > 1.0f) ? 1.0f : (pan < -1.0f) ? -1.0f : pan;
        // compute playback volume based on volume and pan, using balance rule, in order to avoid
        // lowering volume when panning in center
        pSynthData->mVolume[AudioTrack::LEFT] = vol;
        pSynthData->mVolume[AudioTrack::RIGHT] = vol;
        if (panning > 0.0f) {
            pSynthData->mVolume[AudioTrack::LEFT] *= (1.0f - panning);
        } else if (panning < 0.0f) {
            pSynthData->mVolume[AudioTrack::RIGHT] *= (1.0f + panning);
        }

        // apply the volume if there is an output
        if (NULL != pSynthData->mAudioOut) {
            pSynthData->mAudioOut->setVolume(pSynthData->mVolume[AudioTrack::LEFT],
                    pSynthData->mVolume[AudioTrack::RIGHT]);
        }

        //LOGV("android_tts_SynthProxy_speak() vol=%.3f pan=%.3f, mVolume=[%.1f %.1f]",
        //        volume, pan,
        //        pSynthData->mVolume[AudioTrack::LEFT], pSynthData->mVolume[AudioTrack::RIGHT]);
    }

    afterSynthData_t* pForAfter = new (afterSynthData_t);
    pForAfter->jniStorage = jniData;
@@ -935,7 +963,7 @@ static JNINativeMethod gMethods[] = {
        (void*)android_tts_SynthProxy_stopSync
    },
    {   "native_speak",
        "(ILjava/lang/String;I)I",
        "(ILjava/lang/String;IFF)I",
        (void*)android_tts_SynthProxy_speak
    },
    {   "native_synthesizeToFile",
+7 −4
Original line number Diff line number Diff line
@@ -78,12 +78,13 @@ public class SynthProxy {
    /**
     * Synthesize speech and speak it directly using AudioTrack.
     */
    public int speak(String text, int streamType) {
    public int speak(String text, int streamType, float volume, float pan) {
        Log.i(TAG, "speak() on stream "+ streamType);
        if ((streamType > -1) && (streamType < AudioSystem.getNumStreamTypes())) {
            return native_speak(mJniData, text, streamType);
            return native_speak(mJniData, text, streamType, volume, pan);
        } else {
            Log.e("SynthProxy", "Trying to speak with invalid stream type " + streamType);
            return native_speak(mJniData, text, AudioManager.STREAM_MUSIC);
            return native_speak(mJniData, text, AudioManager.STREAM_MUSIC, volume, pan);
        }
    }

@@ -93,6 +94,7 @@ public class SynthProxy {
     * "/sdcard/???.wav" is recommended.
     */
    public int synthesizeToFile(String text, String filename) {
        Log.i(TAG, "synthesizeToFile() to file "+ filename);
        return native_synthesizeToFile(mJniData, text, filename);
    }

@@ -192,7 +194,8 @@ public class SynthProxy {

    private native final int native_stopSync(int jniData);

    private native final int native_speak(int jniData, String text, int streamType);
    private native final int native_speak(int jniData, String text, int streamType, float volume,
            float pan);

    private native final int native_synthesizeToFile(int jniData, String text, String filename);

+15 −2
Original line number Diff line number Diff line
@@ -121,7 +121,6 @@ public class TtsService extends Service implements OnCompletionListener {
    private static final int SPEECHQUEUELOCK_TIMEOUT = 5000;
    private static final int MAX_SPEECH_ITEM_CHAR_LENGTH = 4000;
    private static final int MAX_FILENAME_LENGTH = 250;
    // TODO use the TTS stream type when available
    private static final int DEFAULT_STREAM_TYPE = AudioManager.STREAM_MUSIC;
    // TODO use TextToSpeech.DEFAULT_SYNTH once it is unhidden
    private static final String DEFAULT_SYNTH = "com.svox.pico";
@@ -791,6 +790,8 @@ public class TtsService extends Service implements OnCompletionListener {
                    String speechRate = "";
                    String engine = "";
                    String pitch = "";
                    float volume = TextToSpeech.Engine.DEFAULT_VOLUME;
                    float pan = TextToSpeech.Engine.DEFAULT_PAN;
                    if (speechItem.mParams != null){
                        for (int i = 0; i < speechItem.mParams.size() - 1; i = i + 2){
                            String param = speechItem.mParams.get(i);
@@ -816,6 +817,18 @@ public class TtsService extends Service implements OnCompletionListener {
                                    engine = speechItem.mParams.get(i + 1);
                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PITCH)) {
                                    pitch = speechItem.mParams.get(i + 1);
                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_VOLUME)) {
                                    try {
                                        volume = Float.parseFloat(speechItem.mParams.get(i + 1));
                                    } catch (NumberFormatException e) {
                                        volume = TextToSpeech.Engine.DEFAULT_VOLUME;
                                    }
                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PAN)) {
                                    try {
                                        pan = Float.parseFloat(speechItem.mParams.get(i + 1));
                                    } catch (NumberFormatException e) {
                                        pan = TextToSpeech.Engine.DEFAULT_PAN;
                                    }
                                }
                            }
                        }
@@ -844,7 +857,7 @@ public class TtsService extends Service implements OnCompletionListener {
                            setPitch("", getDefaultPitch());
                        }
                        try {
                            sNativeSynth.speak(speechItem.mText, streamType);
                            sNativeSynth.speak(speechItem.mText, streamType, volume, pan);
                        } catch (NullPointerException e) {
                            // synth will become null during onDestroy()
                            Log.v(SERVICE_TAG, " null synth, can't speak");