Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a191afb7 authored by satok's avatar satok Committed by Android (Google) Code Review
Browse files

Merge "Implement multi words suggestions step1"

parents 3b0f2bf1 1f6b52e7
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -827,11 +827,6 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
    const bool capitalizedWordDemotion =
            firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion;

    if (DEBUG_DICT_FULL) {
        AKLOGI("Two words: %c, %c, %d",
                word[0], word[firstWordLength + 1], capitalizedWordDemotion);
    }

    if (firstWordLength == 0 || secondWordLength == 0) {
        return 0;
    }
@@ -891,6 +886,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
        multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq);
    }

    if (DEBUG_CORRECTION_FREQ) {
        AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength,
                secondWordLength, capitalizedWordDemotion, totalFreq);
        DUMP_WORD(word, firstWordLength);
    }

    return totalFreq;
}

+3 −3
Original line number Diff line number Diff line
@@ -216,15 +216,15 @@ static void prof_out(void) {
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10
#define SUB_QUEUE_MIN_WORD_LENGTH 4
#define SUB_QUEUE_MAX_WORD_INDEX 2
#define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2

#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22

#define MAX_DEPTH_MULTIPLIER 3

#define FIRST_WORD_INDEX 1
#define SECOND_WORD_INDEX 2
#define FIRST_WORD_INDEX 0
#define SECOND_WORD_INDEX 1

// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
// word in the dictionary
+101 −76
Original line number Diff line number Diff line
@@ -224,15 +224,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    // Multiple word suggestions
    if (SUGGEST_MULTIPLE_WORDS
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
        for (int i = 1; i < inputLength; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest multiple words %d", i);
            }
        getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
                    useFullEditDistance, inputLength, i, correction, queuePool,
                useFullEditDistance, inputLength, correction, queuePool,
                hasAutoCorrectionCandidate);
    }
    }
    PROF_END(5);

    PROF_START(6);
@@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
    int wordLength;
    unsigned short* wordPointer;

    if ((currentWordIndex == 1) && addToMasterQueue) {
    if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) {
        WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
        const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
        if (finalFreq != NOT_A_FREQUENCY) {
@@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion(
        const int inputWordStartPos, const int inputWordLength,
        const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
        int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
    if (DEBUG_DICT) {
        assert(currentWordIndex >= 1);
    }
    unsigned short* tempOutputWord = 0;
    int tempOutputWordLength = 0;
    int nextWordLength = 0;
    // TODO: Optimize init suggestion
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);
@@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion(
    int freq = getMostFrequentWordLike(
            inputWordStartPos, inputWordLength, proximityInfo, mWord);
    if (freq > 0) {
        tempOutputWordLength = inputWordLength;
        nextWordLength = inputWordLength;
        tempOutputWord = mWord;
    } else if (!hasAutoCorrectionCandidate) {
        if (inputWordStartPos > 0) {
@@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion(
            getSuggestionCandidates(useFullEditDistance, inputWordLength, correction,
                    queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex);
            if (DEBUG_DICT) {
                if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) {
                if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
                    AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength);
                    for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
                        queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord();
@@ -415,111 +407,144 @@ bool UnigramDictionary::getSubStringSuggestion(
        int score = 0;
        const double ns = queue->getHighestNormalizedScore(
                proximityInfo->getPrimaryInputWord(), inputWordLength,
                &tempOutputWord, &score, &tempOutputWordLength);
                &tempOutputWord, &score, &nextWordLength);
        if (DEBUG_DICT) {
            AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score);
        }
        // Two words correction won't be done if the score of the first word doesn't exceed the
        // threshold.
        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
                || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
                || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
            return false;
        }
        freq = score >> (tempOutputWordLength
                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
        freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    }
    if (DEBUG_DICT) {
        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
                , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)"
                , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos,
                wordLengthArray[0]);
    }
    if (freq <= 0 || tempOutputWordLength <= 0
            || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
    if (freq <= 0 || nextWordLength <= 0
            || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) {
        return false;
    }
    for (int i = 0; i < tempOutputWordLength; ++i) {
    for (int i = 0; i < nextWordLength; ++i) {
        outputWord[outputWordStartPos + i] = tempOutputWord[i];
    }

    // Put output values
    freqArray[currentWordIndex - 1] = freq;
    freqArray[currentWordIndex] = freq;
    // TODO: put output length instead of input length
    wordLengthArray[currentWordIndex - 1] = inputWordLength;
    *outputWordLength = outputWordStartPos + tempOutputWordLength;
    wordLengthArray[currentWordIndex] = inputWordLength;
    const int tempOutputWordLength = outputWordStartPos + nextWordLength;
    if (outputWordLength) {
        *outputWordLength = tempOutputWordLength;
    }

    if ((inputWordStartPos + inputWordLength) < inputLength) {
        if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
        if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) {
            return false;
        }
        outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
        if (outputWordLength) {
            ++*outputWordLength;
    } else if (currentWordIndex >= 2) {
        }
    } else if (currentWordIndex >= 1) {
        // TODO: Handle 3 or more words
        const int pairFreq = correction->getFreqForSplitTwoWords(
                freqArray, wordLengthArray, isSpaceProximity, outputWord);
        if (DEBUG_DICT) {
            AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
                    inputLength);
            AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq,
                    inputLength, wordLengthArray[0]);
        }
        addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
        addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue());
    }
    return true;
}

void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
        const bool useFullEditDistance, const int inputLength,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
        const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex,
        const int outputWordLength, int *freqArray, int* wordLengthArray,
        unsigned short* outputWord) {
    if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) {
        // Return if the last word index
        return;
    }

    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
    int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
    int outputWordLength = 0;

    for (int i = 1; i < inputLength; ++i) {
        int tempOutputWordLength = 0;
        // First word
        int inputWordStartPos = 0;
    int inputWordLength = wordDivideIndex;
        int inputWordLength = i;
        if (DEBUG_CORRECTION_FREQ) {
            AKLOGI("Two words, %d", inputWordLength);
        }
        if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
            freqArray, wordLengthArray, outputWord, &outputWordLength)) {
        return;
                freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) {
            continue;
        }

    const int tempOutputWordLength = outputWordLength;
        // Second word
        // Missing space
    inputWordStartPos = wordDivideIndex;
    inputWordLength = inputLength - wordDivideIndex;
        inputWordStartPos = i;
        inputWordLength = inputLength - i;
        getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
                false /* missing space */, freqArray, wordLengthArray, outputWord,
                0);

        // Mistyped space
        ++inputWordStartPos;
        --inputWordLength;

        if (inputWordLength <= 0) {
        return;
            continue;
        }

        const int x = xcoordinates[inputWordStartPos - 1];
        const int y = ycoordinates[inputWordStartPos - 1];
        if (!proximityInfo->hasSpaceProximity(x, y)) {
        return;
            continue;
        }

        getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
                useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
                SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
                true /* mistyped space */, freqArray, wordLengthArray, outputWord,
                0);
    }
}

void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
    }
    if (DEBUG_DICT) {
        AKLOGI("--- Suggest multiple words");
    }

    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    const int outputWordLength = 0;
    const int startInputPos = 0;
    const int startWordIndex = 0;
    getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate,
            startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray,
            outputWord);
}

// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
+8 −1
Original line number Diff line number Diff line
@@ -103,7 +103,7 @@ class UnigramDictionary {
            const int currentWordIndex);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
            const bool useFullEditDistance, const int inputLength,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
@@ -127,6 +127,13 @@ class UnigramDictionary {
            const int inputWordStartPos, const int inputWordLength,
            const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
            int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);
    void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex,
            const int outputWordLength, int *freqArray, int* wordLengthArray,
            unsigned short* outputWord);

    const uint8_t* const DICT_ROOT;
    const int MAX_WORD_LENGTH;
+14 −25
Original line number Diff line number Diff line
@@ -27,11 +27,10 @@ class WordsPriorityQueuePool {
 public:
    WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) {
        mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength);
        for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT;
        for (int i = 0, subQueueBufOffset = 0;
                i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT;
                ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) {
            mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset)
                    WordsPriorityQueue(subQueueMaxWords, maxWordLength);
            mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset)
            mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset)
                    WordsPriorityQueue(subQueueMaxWords, maxWordLength);
        }
    }
@@ -44,7 +43,7 @@ class WordsPriorityQueuePool {
    }

    WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) {
        if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) {
        if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) {
            return 0;
        }
        if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) {
@@ -53,30 +52,21 @@ class WordsPriorityQueuePool {
            }
            return 0;
        }
        // TODO: Come up with more generic pool
        if (wordIndex == 1) {
            return mSubQueues1[inputWordLength];
        } else if (wordIndex == 2) {
            return mSubQueues2[inputWordLength];
        } else {
            return 0;
        }
        return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength];
    }

    inline void clearAll() {
        mMasterQueue->clear();
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
            mSubQueues1[i]->clear();
            mSubQueues2[i]->clear();
        for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) {
            clearSubQueue(i);
        }
    }

    inline void clearSubQueue(const int wordIndex) {
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
            if (wordIndex == 1) {
                mSubQueues1[i]->clear();
            } else if (wordIndex == 2) {
                mSubQueues2[i]->clear();
            WordsPriorityQueue* queue = getSubQueue(wordIndex, i);
            if (queue) {
                queue->clear();
            }
        }
    }
@@ -84,17 +74,16 @@ class WordsPriorityQueuePool {
    void dumpSubQueue1TopSuggestions() {
        AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS");
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
            mSubQueues1[i]->dumpTopWord();
            getSubQueue(0, i)->dumpTopWord();
        }
    }

 private:
    WordsPriorityQueue* mMasterQueue;
    WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT];
    WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT];
    WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS];
    char mMasterQueueBuf[sizeof(WordsPriorityQueue)];
    char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
    char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
    char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS
                      * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)];
};
}