Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1f6b52e7 authored by satok's avatar satok
Browse files

Implement multi words suggestions step1

Change-Id: I96e8e1b0d9ccc0ed13d53c40300d8c19bcb7af5b
parent 38a3ad3e
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -827,11 +827,6 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
    const bool capitalizedWordDemotion =
            firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;

    if (DEBUG_DICT_FULL) {
        AKLOGI("Two words: %c, %c, %d",
                word[0], word[firstWordLength + 1], capitalizedWordDemotion);
    }

    if (firstWordLength == 0 || secondWordLength == 0) {
        return 0;
    }
@@ -891,6 +886,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
        multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
    }

    if (DEBUG_CORRECTION_FREQ) {
        AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength,
                secondWordLength, capitalizedWordDemotion, totalFreq);
        DUMP_WORD(word, firstWordLength);
    }

    return totalFreq;
}

+3 −3
Original line number Diff line number Diff line
@@ -216,15 +216,15 @@ static void prof_out(void) {
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10
#define SUB_QUEUE_MIN_WORD_LENGTH 4
#define SUB_QUEUE_MAX_WORD_INDEX 2
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2

#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22

#define MAX_DEPTH_MULTIPLIER 3

#define FIRST_WORD_INDEX 1
#define SECOND_WORD_INDEX 2
#define FIRST_WORD_INDEX 0
#define SECOND_WORD_INDEX 1

// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
// word in the dictionary
+101 −76
Original line number Diff line number Diff line
@@ -224,15 +224,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    // Multiple word suggestions
    if (SUGGEST_MULTIPLE_WORDS
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
        for (int i = 1; i < inputLength; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest multiple words %d", i);
            }
        getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
                    useFullEditDistance, inputLength, i, correction, queuePool,
                useFullEditDistance, inputLength, correction, queuePool,
                hasAutoCorrectionCandidate);
    }
    }
    PROF_END(5);

    PROF_START(6);
@@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
    int wordLength;
    unsigned short* wordPointer;

    if ((currentWordIndex == 1) && addToMasterQueue) {
    if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
        WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
        const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
        if (finalFreq != NOT_A_FREQUENCY) {
@@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion(
        const int inputWordStartPos, const int inputWordLength,
        const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
        int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
    if (DEBUG_DICT) {
        assert(currentWordIndex >= 1);
    }
    unsigned short* tempOutputWord = 0;
    int tempOutputWordLength = 0;
    int nextWordLength = 0;
    // TODO: Optimize init suggestion
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);
@@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion(
    int freq = getMostFrequentWordLike(
            inputWordStartPos, inputWordLength, proximityInfo, mWord);
    if (freq > 0) {
        tempOutputWordLength = inputWordLength;
        nextWordLength = inputWordLength;
        tempOutputWord = mWord;
    } else if (!hasAutoCorrectionCandidate) {
        if (inputWordStartPos > 0) {
@@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion(
            getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
                    queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
            if (DEBUG_DICT) {
                if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) {
                if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
                    AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
                    for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
                        queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
@@ -415,111 +407,144 @@ bool UnigramDictionary::getSubStringSuggestion(
        int score = 0;
        const double ns = queue->getHighestNormalizedScore(
                proximityInfo->getPrimaryInputWord(), inputWordLength,
                &tempOutputWord, &score, &tempOutputWordLength);
                &tempOutputWord, &score, &nextWordLength);
        if (DEBUG_DICT) {
            AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
        }
        // Two words correction won't be done if the score of the first word doesn't exceed the
        // threshold.
        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
                || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
                || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
            return false;
        }
        freq = score >> (tempOutputWordLength
                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
        freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    }
    if (DEBUG_DICT) {
        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
                , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)"
                , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
                wordLengthArray[0]);
    }
    if (freq <= 0 || tempOutputWordLength <= 0
            || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
    if (freq <= 0 || nextWordLength <= 0
            || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
        return false;
    }
    for (int i = 0; i < tempOutputWordLength; ++i) {
    for (int i = 0; i < nextWordLength; ++i) {
        outputWord[outputWordStartPos + i] = tempOutputWord[i];
    }

    // Put output values
    freqArray[currentWordIndex - 1] = freq;
    freqArray[currentWordIndex] = freq;
    // TODO: put output length instead of input length
    wordLengthArray[currentWordIndex - 1] = inputWordLength;
    *outputWordLength = outputWordStartPos + tempOutputWordLength;
    wordLengthArray[currentWordIndex] = inputWordLength;
    const int tempOutputWordLength = outputWordStartPos + nextWordLength;
    if (outputWordLength) {
        *outputWordLength = tempOutputWordLength;
    }

    if ((inputWordStartPos + inputWordLength) < inputLength) {
        if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
        if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
            return false;
        }
        outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
        if (outputWordLength) {
            ++*outputWordLength;
    } else if (currentWordIndex >= 2) {
        }
    } else if (currentWordIndex >= 1) {
        // TODO: Handle 3 or more words
        const int pairFreq = correction->getFreqForSplitTwoWords(
                freqArray, wordLengthArray, isSpaceProximity, outputWord);
        if (DEBUG_DICT) {
            AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
                    inputLength);
            AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq,
                    inputLength, wordLengthArray[0]);
        }
        addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
        addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
    }
    return true;
}

void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
        const bool useFullEditDistance, const int inputLength,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
        const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
        const int outputWordLength, int *freqArray, int* wordLengthArray,
        unsigned short* outputWord) {
    if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
        // Return if the last word index
        return;
    }

    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
    int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
    int outputWordLength = 0;

    for (int i = 1; i < inputLength; ++i) {
        int tempOutputWordLength = 0;
        // First word
        int inputWordStartPos = 0;
    int inputWordLength = wordDivideIndex;
        int inputWordLength = i;
        if (DEBUG_CORRECTION_FREQ) {
            AKLOGI("Two words, %d", inputWordLength);
        }
        if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
            freqArray, wordLengthArray, outputWord, &outputWordLength)) {
        return;
                freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) {
            continue;
        }

    const int tempOutputWordLength = outputWordLength;
        // Second word
        // Missing space
    inputWordStartPos = wordDivideIndex;
    inputWordLength = inputLength - wordDivideIndex;
        inputWordStartPos = i;
        inputWordLength = inputLength - i;
        getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
                false /* missing space */, freqArray, wordLengthArray, outputWord,
                0);

        // Mistyped space
        ++inputWordStartPos;
        --inputWordLength;

        if (inputWordLength <= 0) {
        return;
            continue;
        }

        const int x = xcoordinates[inputWordStartPos - 1];
        const int y = ycoordinates[inputWordStartPos - 1];
        if (!proximityInfo->hasSpaceProximity(x, y)) {
        return;
            continue;
        }

        getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
                true /* mistyped space */, freqArray, wordLengthArray, outputWord,
                0);
    }
}

void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
    }
    if (DEBUG_DICT) {
        AKLOGI("--- Suggest multiple words");
    }

    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    const int outputWordLength = 0;
    const int startInputPos = 0;
    const int startWordIndex = 0;
    getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate,
            startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
            outputWord);
}

// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
+8 −1
Original line number Diff line number Diff line
@@ -103,7 +103,7 @@ class UnigramDictionary {
            const int currentWordIndex);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
            const bool useFullEditDistance, const int inputLength,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
@@ -127,6 +127,13 @@ class UnigramDictionary {
            const int inputWordStartPos, const int inputWordLength,
            const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
            int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);
    void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
            const int outputWordLength, int *freqArray, int* wordLengthArray,
            unsigned short* outputWord);

    const uint8_t* const DICT_ROOT;
    const int MAX_WORD_LENGTH;
+14 −25
Original line number Diff line number Diff line
@@ -27,11 +27,10 @@ class WordsPriorityQueuePool {
 public:
    WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
        mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
        for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT;
        for (int i = 0, subQueueBufOffset = 0;
                i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
                ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
            mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset)
                    WordsPriorityQueue(subQueueMaxWords, maxWordLength);
            mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset)
            mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
                    WordsPriorityQueue(subQueueMaxWords, maxWordLength);
        }
    }
@@ -44,7 +43,7 @@ class WordsPriorityQueuePool {
    }

    WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) {
        if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) {
        if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
            return 0;
        }
        if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) {
@@ -53,30 +52,21 @@ class WordsPriorityQueuePool {
            }
            return 0;
        }
        // TODO: Come up with more generic pool
        if (wordIndex == 1) {
            return mSubQueues1[inputWordLength];
        } else if (wordIndex == 2) {
            return mSubQueues2[inputWordLength];
        } else {
            return 0;
        }
        return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength];
    }

    inline void clearAll() {
        mMasterQueue->clear();
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
            mSubQueues1[i]->clear();
            mSubQueues2[i]->clear();
        for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) {
            clearSubQueue(i);
        }
    }

    inline void clearSubQueue(const int wordIndex) {
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
            if (wordIndex == 1) {
                mSubQueues1[i]->clear();
            } else if (wordIndex == 2) {
                mSubQueues2[i]->clear();
            WordsPriorityQueue* queue = getSubQueue(wordIndex, i);
            if (queue) {
                queue->clear();
            }
        }
    }
@@ -84,17 +74,16 @@ class WordsPriorityQueuePool {
    void dumpSubQueue1TopSuggestions() {
        AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
            mSubQueues1[i]->dumpTopWord();
            getSubQueue(0, i)->dumpTopWord();
        }
    }

 private:
    WordsPriorityQueue* mMasterQueue;
    WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];
    WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT];
    WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
    char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
    char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
    char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
                      * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
};
}