Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a17cef01 authored by Charles Chen's avatar Charles Chen
Browse files

Updating TtsEngine.h and SynthProxy.cpp so that buffer memory

management is handled on the system side.
parent 7c4faf56
Loading
Loading
Loading
Loading
+22 −17
Original line number Original line Diff line number Diff line
@@ -25,24 +25,29 @@


namespace android {
namespace android {


enum tts_synth_status {
    TTS_SYNTH_DONE              = 0,
    TTS_SYNTH_PENDING           = 1
};

enum tts_callback_status {
    TTS_CALLBACK_HALT           = 0,
    TTS_CALLBACK_CONTINUE       = 1
};

// The callback is used by the implementation of this interface to notify its
// The callback is used by the implementation of this interface to notify its
// client, the Android TTS service, that the last requested synthesis has been
// client, the Android TTS service, that the last requested synthesis has been
// completed.
// completed. // TODO reword
// The callback for synthesis completed takes:
// The callback for synthesis completed takes:
//    void *       - The userdata pointer set in the original synth call
//    [inout] void *&      - The userdata pointer set in the original synth call
//    uint32_t     - Track sampling rate in Hz
//    [in]    uint32_t     - Track sampling rate in Hz
//    audio_format - The AudioSystem::audio_format enum
//    [in]    audio_format - The AudioSystem::audio_format enum
//    int          - The number of channels
//    [in]    int          - The number of channels
//    int8_t *     - A buffer of audio data only valid during the execution of the callback
//    [inout] int8_t *&     - A buffer of audio data only valid during the execution of the callback
//    size_t       - The size of the buffer
//    [inout] size_t  &     - The size of the buffer
// Note about memory management:
//    [in]    tts_synth_status  - Status of the synthesis; 0 for done, 1 for more data to be synthesized.
//    The implementation of TtsEngine is responsible for the management of the memory
// Returns the status of the consumer of the synthesis. 0 for stop, 1 for continue.
//    it allocates to store the synthesized speech. After the execution of the callback
typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, AudioSystem::audio_format, int, int8_t *&, size_t&, tts_synth_status);
//    to hand the synthesized data to the client of TtsEngine, the TTS engine is
//    free to reuse or free the previously allocated memory.
//    This implies that the implementation of the "synthDoneCB" callback cannot use
//    the pointer to the buffer of audio samples outside of the callback itself.
typedef void (synthDoneCB_t)(void *, uint32_t, AudioSystem::audio_format, int, int8_t *, size_t);


class TtsEngine;
class TtsEngine;
extern "C" TtsEngine* getTtsEngine();
extern "C" TtsEngine* getTtsEngine();
@@ -155,13 +160,13 @@ public:
    // @param text      the UTF-8 text to synthesize
    // @param text      the UTF-8 text to synthesize
    // @param userdata  pointer to be returned when the call is invoked
    // @param userdata  pointer to be returned when the call is invoked
    // @return          TTS_SUCCESS or TTS_FAILURE
    // @return          TTS_SUCCESS or TTS_FAILURE
    virtual tts_result synthesizeText(const char *text, void *userdata);
    virtual tts_result synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata);


    // Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API.
    // Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API.
    // @param ipa      the IPA data to synthesize
    // @param ipa      the IPA data to synthesize
    // @param userdata  pointer to be returned when the call is invoked
    // @param userdata  pointer to be returned when the call is invoked
    // @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE
    // @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE
    virtual tts_result synthesizeIpa(const char *ipa, void *userdata);
    virtual tts_result synthesizeIpa(const char *ipa, int8_t *buffer, size_t bufferSize, void *userdata);
};
};


} // namespace android
} // namespace android
+30 −9
Original line number Original line Diff line number Diff line
@@ -32,6 +32,7 @@
#define DEFAULT_TTS_RATE        16000
#define DEFAULT_TTS_RATE        16000
#define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
#define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
#define DEFAULT_TTS_NB_CHANNELS 1
#define DEFAULT_TTS_NB_CHANNELS 1
#define DEFAULT_TTS_BUFFERSIZE  1024


#define USAGEMODE_PLAY_IMMEDIATELY 0
#define USAGEMODE_PLAY_IMMEDIATELY 0
#define USAGEMODE_WRITE_TO_FILE    1
#define USAGEMODE_WRITE_TO_FILE    1
@@ -64,6 +65,8 @@ class SynthProxyJniStorage {
        uint32_t                  mSampleRate;
        uint32_t                  mSampleRate;
        AudioSystem::audio_format mAudFormat;
        AudioSystem::audio_format mAudFormat;
        int                       mNbChannels;
        int                       mNbChannels;
        int8_t *                  mBuffer;
        size_t                    mBufferSize;


        SynthProxyJniStorage() {
        SynthProxyJniStorage() {
            //tts_class = NULL;
            //tts_class = NULL;
@@ -73,6 +76,8 @@ class SynthProxyJniStorage {
            mSampleRate = DEFAULT_TTS_RATE;
            mSampleRate = DEFAULT_TTS_RATE;
            mAudFormat  = DEFAULT_TTS_FORMAT;
            mAudFormat  = DEFAULT_TTS_FORMAT;
            mNbChannels = DEFAULT_TTS_NB_CHANNELS;
            mNbChannels = DEFAULT_TTS_NB_CHANNELS;
            mBufferSize = DEFAULT_TTS_BUFFERSIZE;
            mBuffer = new int8_t[mBufferSize];
        }
        }


        ~SynthProxyJniStorage() {
        ~SynthProxyJniStorage() {
@@ -81,6 +86,7 @@ class SynthProxyJniStorage {
                mNativeSynthInterface->shutdown();
                mNativeSynthInterface->shutdown();
                mNativeSynthInterface = NULL;
                mNativeSynthInterface = NULL;
            }
            }
            delete mBuffer;
        }
        }


        void killAudio() {
        void killAudio() {
@@ -159,23 +165,27 @@ void prepAudioTrack(SynthProxyJniStorage* pJniData,
 * Callback from TTS engine.
 * Callback from TTS engine.
 * Directly speaks using AudioTrack or write to file
 * Directly speaks using AudioTrack or write to file
 */
 */
static void ttsSynthDoneCB(void * userdata, uint32_t rate,
static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
                           AudioSystem::audio_format format, int channel,
                           AudioSystem::audio_format format, int channel,
                           int8_t *wav, size_t bufferSize) {
                           int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
    LOGI("ttsSynthDoneCallback: %d bytes", bufferSize);
    LOGI("ttsSynthDoneCallback: %d bytes", bufferSize);


    if (userdata == NULL){
        LOGE("userdata == NULL");
        return TTS_CALLBACK_HALT;
    }
    afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
    afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
    SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);


    if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
    if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
        LOGI("Direct speech");
        LOGI("Direct speech");


        if (wav == NULL) {
        if (wav == NULL) {
            delete pForAfter;
            LOGI("Null: speech has completed");
            LOGI("Null: speech has completed");
        }
        }


        if (bufferSize > 0) {
        if (bufferSize > 0) {
            SynthProxyJniStorage* pJniData =
                    (SynthProxyJniStorage*)(pForAfter->jniStorage);
            prepAudioTrack(pJniData, rate, format, channel);
            prepAudioTrack(pJniData, rate, format, channel);
            if (pJniData->mAudioOut) {
            if (pJniData->mAudioOut) {
                pJniData->mAudioOut->write(wav, bufferSize);
                pJniData->mAudioOut->write(wav, bufferSize);
@@ -187,6 +197,7 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
    } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
    } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
        LOGI("Save to file");
        LOGI("Save to file");
        if (wav == NULL) {
        if (wav == NULL) {
            delete pForAfter;
            LOGI("Null: speech has completed");
            LOGI("Null: speech has completed");
        }
        }
        if (bufferSize > 0){
        if (bufferSize > 0){
@@ -195,10 +206,17 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate,
    }
    }
    // TODO update to call back into the SynthProxy class through the
    // TODO update to call back into the SynthProxy class through the
    //      javaTTSFields.synthProxyMethodPost methode to notify
    //      javaTTSFields.synthProxyMethodPost methode to notify
    //      playback has completed
    //      playback has completed if the synthesis is done, i.e.
    //      if status == TTS_SYNTH_DONE
    //delete pForAfter;


    delete pForAfter;
    // we don't update the wav (output) parameter as we'll let the next callback
    return;
    // write at the same location, we've consumed the data already, but we need
    // to update bufferSize to let the TTS engine know how much it can write the
    // next time it calls this function.
    bufferSize = pJniData->mBufferSize;

    return TTS_CALLBACK_CONTINUE;
}
}




@@ -223,7 +241,9 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
    } else {
    } else {
        TtsEngine *(*get_TtsEngine)() =
        TtsEngine *(*get_TtsEngine)() =
            reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
            reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));

        pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
        pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();

        if (pJniStorage->mNativeSynthInterface) {
        if (pJniStorage->mNativeSynthInterface) {
            pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB);
            pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB);
        }
        }
@@ -323,7 +343,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,


    // TODO check return codes
    // TODO check return codes
    if (pSynthData->mNativeSynthInterface) {
    if (pSynthData->mNativeSynthInterface) {
        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                (void *)pForAfter);
                (void *)pForAfter);
    }
    }


@@ -395,7 +415,7 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,


    if (pSynthData->mNativeSynthInterface) {
    if (pSynthData->mNativeSynthInterface) {
        const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
        const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
        pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
                (void *)pForAfter);
                (void *)pForAfter);
        env->ReleaseStringUTFChars(textJavaString, textNativeString);
        env->ReleaseStringUTFChars(textJavaString, textNativeString);
    }
    }
@@ -442,6 +462,7 @@ static void
android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData,
android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData,
        int bufferPointer, int bufferSize)
        int bufferPointer, int bufferSize)
{
{
LOGI("android_tts_SynthProxy_playAudioBuffer");
    if (jniData == 0) {
    if (jniData == 0) {
        LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data");
        LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data");
        return;
        return;