Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 23f486f7 authored by satok's avatar satok Committed by Android (Google) Code Review
Browse files

Merge "Merge missing space and mistyped space correction algorithm"

parents c882d619 9955716d
Loading
Loading
Loading
Loading
+10 −19
Original line number Diff line number Diff line
@@ -158,10 +158,10 @@ void Correction::checkState() {
    }
}

int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
        const unsigned short *word) {
    return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
            firstFreq, secondFreq, this, word);
int Correction::getFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray,
        const bool isSpaceProximity, const unsigned short *word) {
    return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(freqArray, wordLengthArray, this,
            isSpaceProximity, word);
}

int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
@@ -806,21 +806,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const

/* static */
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
        const int firstFreq, const int secondFreq, const Correction* correction,
        const unsigned short *word) {
    const int spaceProximityPos = correction->mSpaceProximityPos;
    const int missingSpacePos = correction->mMissingSpacePos;
    if (DEBUG_DICT) {
        int inputCount = 0;
        if (spaceProximityPos >= 0) ++inputCount;
        if (missingSpacePos >= 0) ++inputCount;
        assert(inputCount <= 1);
    }
    const bool isSpaceProximity = spaceProximityPos >= 0;
    const int inputLength = correction->mInputLength;
    const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
    const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
            : (inputLength - missingSpacePos);
        const int *freqArray, const int *wordLengthArray, const Correction* correction,
        const bool isSpaceProximity, const unsigned short *word) {
    const int firstFreq = freqArray[0];
    const int secondFreq = freqArray[1];
    const int firstWordLength = wordLengthArray[0];
    const int secondWordLength = wordLengthArray[1];
    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;

    bool firstCapitalizedWordDemotion = false;
+5 −3
Original line number Diff line number Diff line
@@ -122,7 +122,8 @@ class Correction {
    bool needsToPrune() const;

    int getFreqForSplitTwoWords(
            const int firstFreq, const int secondFreq, const unsigned short *word);
            const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity,
            const unsigned short *word);
    int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
    int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
            const int inputLength);
@@ -150,8 +151,9 @@ class Correction {
        static int calculateFinalFreq(const int inputIndex, const int depth,
                const int freq, int *editDistanceTable, const Correction* correction,
                const int inputLength);
        static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
                const Correction* correction, const unsigned short *word);
        static int calcFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray,
                const Correction* correction, const bool isSpaceProximity,
                const unsigned short *word);
        static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
                const unsigned short* after, const int afterLength, const int score);
        static int editDistance(const unsigned short* before,
+2 −3
Original line number Diff line number Diff line
@@ -180,10 +180,9 @@ static void prof_out(void) {
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true

#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
#define SUGGEST_MULTIPLE_WORDS true

// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
@@ -233,7 +232,7 @@ static void prof_out(void) {

// Minimum suggest depth for one word for all cases except for missing space suggestions.
#define MIN_SUGGEST_DEPTH 1
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3

#define min(a,b) ((a)<(b)?(a):(b))
+71 −87
Original line number Diff line number Diff line
@@ -211,7 +211,6 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    PROF_END(3);

    PROF_START(4);
    // Note: This line is intentionally left blank
    bool hasAutoCorrectionCandidate = false;
    WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
    if (masterQueue->size() > 0) {
@@ -222,14 +221,14 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    PROF_END(4);

    PROF_START(5);
    // Suggestions with missing space
    if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
    // Multiple word suggestions
    if (SUGGEST_MULTIPLE_WORDS
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
        for (int i = 1; i < inputLength; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest missing space characters %d", i);
                AKLOGI("--- Suggest multiple words %d", i);
            }
            getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
            getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
                    useFullEditDistance, inputLength, i, correction, queuePool,
                    hasAutoCorrectionCandidate);
        }
@@ -237,26 +236,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    PROF_END(5);

    PROF_START(6);
    if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
        // The first and last "mistyped spaces" are taken care of by excessive character handling
        for (int i = 1; i < inputLength - 1; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest words with proximity space %d", i);
            }
            const int x = xcoordinates[i];
            const int y = ycoordinates[i];
            if (DEBUG_PROXIMITY_INFO) {
                AKLOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
                        i, x, y, proximityInfo->hasSpaceProximity(x, y));
            }
            if (proximityInfo->hasSpaceProximity(x, y)) {
                getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
                        useFullEditDistance, inputLength, i, correction, queuePool,
                        hasAutoCorrectionCandidate);
            }
        }
    }
    // Note: This line is intentionally left blank
    PROF_END(6);

    if (DEBUG_DICT) {
        queuePool->dumpSubQueue1TopSuggestions();
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
@@ -337,24 +319,6 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
    }
}

void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
        const int *ycoordinates, const int *codes, const bool useFullEditDistance,
        const int inputLength, const int missingSpacePos, Correction *correction,
        WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
    getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
            correction, queuePool, hasAutoCorrectionCandidate);
}

void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
        const int *ycoordinates, const int *codes, const bool useFullEditDistance,
        const int inputLength, const int spaceProximityPos, Correction *correction,
        WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
    getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
            correction, queuePool, hasAutoCorrectionCandidate);
}

inline void UnigramDictionary::onTerminal(const int freq,
        const TerminalAttributes& terminalAttributes, Correction *correction,
        WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
@@ -405,15 +369,23 @@ inline void UnigramDictionary::onTerminal(const int freq,
    }
}

int UnigramDictionary::getSubStringSuggestion(
bool UnigramDictionary::getSubStringSuggestion(
        ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
        const int *codes, const bool useFullEditDistance, Correction *correction,
        WordsPriorityQueuePool* queuePool, const int inputLength,
        const bool hasAutoCorrectionCandidate, const int currentWordIndex,
        const int inputWordStartPos, const int inputWordLength,
        const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) {
        const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
        int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
    if (DEBUG_DICT) {
        assert(currentWordIndex >= 1);
    }
    unsigned short* tempOutputWord = 0;
    int tempOutputWordLength = 0;
    // TODO: Optimize init suggestion
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);

    int freq = getMostFrequentWordLike(
            inputWordStartPos, inputWordLength, proximityInfo, mWord);
    if (freq > 0) {
@@ -438,7 +410,7 @@ int UnigramDictionary::getSubStringSuggestion(
        }
        WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
        if (!queue || queue->size() < 1) {
            return 0;
            return false;
        }
        int score = 0;
        const double ns = queue->getHighestNormalizedScore(
@@ -451,93 +423,105 @@ int UnigramDictionary::getSubStringSuggestion(
        // threshold.
        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
                || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
            return 0;
            return false;
        }
        freq = score >> (tempOutputWordLength
                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    }
    if (DEBUG_DICT) {
        AKLOGI("Freq(%d): %d", currentWordIndex, freq);
        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
                , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
    }
    if (freq <= 0 || tempOutputWordLength <= 0
            || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
        return 0;
        return false;
    }
    for (int i = 0; i < tempOutputWordLength; ++i) {
        outputWord[outputWordStartPos + i] = tempOutputWord[i];
    }

    // Put output values
    freqArray[currentWordIndex - 1] = freq;
    // TODO: put output length instead of input length
    wordLengthArray[currentWordIndex - 1] = inputWordLength;
    *outputWordLength = outputWordStartPos + tempOutputWordLength;

    if ((inputWordStartPos + inputWordLength) < inputLength) {
        if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
            return 0;
            return false;
        }
        outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
        ++tempOutputWordLength;
        ++*outputWordLength;
    } else if (currentWordIndex >= 2) {
        // TODO: Handle 3 or more words
        const int pairFreq = correction->getFreqForSplitTwoWords(
                freqArray, wordLengthArray, isSpaceProximity, outputWord);
        if (DEBUG_DICT) {
            AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
                    inputLength);
        }
    *outputWordLength = outputWordStartPos + tempOutputWordLength;
    return freq;
        addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
    }
    return true;
}

void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
        const int  spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        int inputCount = 0;
        if (spaceProximityPos >= 0) ++inputCount;
        if (missingSpacePos >= 0) ++inputCount;
        assert(inputCount <= 1);
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
    }

    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);

    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
    int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
    int outputWordLength = 0;

    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    const bool isSpaceProximity = spaceProximityPos >= 0;

    // First word
    int inputWordStartPos = 0;
    int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
    const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
    int inputWordLength = wordDivideIndex;
    if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength);
    if (firstFreq <= 0) {
            FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
            freqArray, wordLengthArray, outputWord, &outputWordLength)) {
        return;
    }

    const int tempOutputWordLength = outputWordLength;
    // Second word
    inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
    inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
            : (inputLength - missingSpacePos);
    const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
    // Missing space
    inputWordStartPos = wordDivideIndex;
    inputWordLength = inputLength - wordDivideIndex;
    getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord,
            &outputWordLength);
    if (secondFreq <= 0) {
        return;
    }
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);

    // TODO: Remove initSuggestions and correction->setCorrectionParams
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
    // Mistyped space
    ++inputWordStartPos;
    --inputWordLength;

    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
            useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
    const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
    if (DEBUG_DICT) {
        AKLOGI("Split two words:  %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
    if (inputWordLength <= 0) {
        return;
    }
    addWord(outputWord, outputWordLength, pairFreq, masterQueue);

    const int x = xcoordinates[inputWordStartPos - 1];
    const int y = ycoordinates[inputWordStartPos - 1];
    if (!proximityInfo->hasSpaceProximity(x, y)) {
        return;
    }

    getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
}

// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
+5 −12
Original line number Diff line number Diff line
@@ -103,17 +103,9 @@ class UnigramDictionary {
            const int currentWordIndex);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
            const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate);
    void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, const int missingSpacePos, Correction *correction,
            WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
    void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, const int spaceProximityPos, Correction *correction,
            WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
            Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
            const int currentWordIndex);
@@ -127,13 +119,14 @@ class UnigramDictionary {
            ProximityInfo *proximityInfo, unsigned short *word);
    int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
            short unsigned int *outWord);
    int getSubStringSuggestion(
    bool getSubStringSuggestion(
            ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
            const int *codes, const bool useFullEditDistance, Correction *correction,
            WordsPriorityQueuePool* queuePool, const int inputLength,
            const bool hasAutoCorrectionCandidate, const int currentWordIndex,
            const int inputWordStartPos, const int inputWordLength,
            const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength);
            const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
            int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);

    const uint8_t* const DICT_ROOT;
    const int MAX_WORD_LENGTH;