Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9955716d authored by satok's avatar satok
Browse files

Merge missing space and mistyped space correction algorithm

Change-Id: Idd64d38d3d29be24748f9c0359667883698a5756
parent 5971a0a0
Loading
Loading
Loading
Loading
+10 −19
Original line number Diff line number Diff line
@@ -158,10 +158,10 @@ void Correction::checkState() {
    }
}

int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
        const unsigned short *word) {
    return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
            firstFreq, secondFreq, this, word);
int Correction::getFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray,
        const bool isSpaceProximity, const unsigned short *word) {
    return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(freqArray, wordLengthArray, this,
            isSpaceProximity, word);
}

int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) {
@@ -806,21 +806,12 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const

/* static */
int Correction::RankingAlgorithm::calcFreqForSplitTwoWords(
        const int firstFreq, const int secondFreq, const Correction* correction,
        const unsigned short *word) {
    const int spaceProximityPos = correction->mSpaceProximityPos;
    const int missingSpacePos = correction->mMissingSpacePos;
    if (DEBUG_DICT) {
        int inputCount = 0;
        if (spaceProximityPos >= 0) ++inputCount;
        if (missingSpacePos >= 0) ++inputCount;
        assert(inputCount <= 1);
    }
    const bool isSpaceProximity = spaceProximityPos >= 0;
    const int inputLength = correction->mInputLength;
    const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
    const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
            : (inputLength - missingSpacePos);
        const int *freqArray, const int *wordLengthArray, const Correction* correction,
        const bool isSpaceProximity, const unsigned short *word) {
    const int firstFreq = freqArray[0];
    const int secondFreq = freqArray[1];
    const int firstWordLength = wordLengthArray[0];
    const int secondWordLength = wordLengthArray[1];
    const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER;

    bool firstCapitalizedWordDemotion = false;
+5 −3
Original line number Diff line number Diff line
@@ -122,7 +122,8 @@ class Correction {
    bool needsToPrune() const;

    int getFreqForSplitTwoWords(
            const int firstFreq, const int secondFreq, const unsigned short *word);
            const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity,
            const unsigned short *word);
    int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
    int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength,
            const int inputLength);
@@ -150,8 +151,9 @@ class Correction {
        static int calculateFinalFreq(const int inputIndex, const int depth,
                const int freq, int *editDistanceTable, const Correction* correction,
                const int inputLength);
        static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
                const Correction* correction, const unsigned short *word);
        static int calcFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray,
                const Correction* correction, const bool isSpaceProximity,
                const unsigned short *word);
        static double calcNormalizedScore(const unsigned short* before, const int beforeLength,
                const unsigned short* after, const int afterLength, const int score);
        static int editDistance(const unsigned short* before,
+2 −3
Original line number Diff line number Diff line
@@ -180,10 +180,9 @@ static void prof_out(void) {
#define CALIBRATE_SCORE_BY_TOUCH_COORDINATES true

#define SUGGEST_WORDS_WITH_MISSING_CHARACTER true
#define SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER true
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
#define SUGGEST_MULTIPLE_WORDS true

// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80
@@ -233,7 +232,7 @@ static void prof_out(void) {

// Minimum suggest depth for one word for all cases except for missing space suggestions.
#define MIN_SUGGEST_DEPTH 1
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3

#define min(a,b) ((a)<(b)?(a):(b))
+71 −87
Original line number Diff line number Diff line
@@ -211,7 +211,6 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    PROF_END(3);

    PROF_START(4);
    // Note: This line is intentionally left blank
    bool hasAutoCorrectionCandidate = false;
    WordsPriorityQueue* masterQueue = queuePool->getMasterQueue();
    if (masterQueue->size() > 0) {
@@ -222,14 +221,14 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    PROF_END(4);

    PROF_START(5);
    // Suggestions with missing space
    if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
    // Multiple word suggestions
    if (SUGGEST_MULTIPLE_WORDS
            && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) {
        for (int i = 1; i < inputLength; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest missing space characters %d", i);
                AKLOGI("--- Suggest multiple words %d", i);
            }
            getMissingSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
            getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
                    useFullEditDistance, inputLength, i, correction, queuePool,
                    hasAutoCorrectionCandidate);
        }
@@ -237,26 +236,9 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
    PROF_END(5);

    PROF_START(6);
    if (SUGGEST_WORDS_WITH_SPACE_PROXIMITY && proximityInfo) {
        // The first and last "mistyped spaces" are taken care of by excessive character handling
        for (int i = 1; i < inputLength - 1; ++i) {
            if (DEBUG_DICT) {
                AKLOGI("--- Suggest words with proximity space %d", i);
            }
            const int x = xcoordinates[i];
            const int y = ycoordinates[i];
            if (DEBUG_PROXIMITY_INFO) {
                AKLOGI("Input[%d] x = %d, y = %d, has space proximity = %d",
                        i, x, y, proximityInfo->hasSpaceProximity(x, y));
            }
            if (proximityInfo->hasSpaceProximity(x, y)) {
                getMistypedSpaceWords(proximityInfo, xcoordinates, ycoordinates, codes,
                        useFullEditDistance, inputLength, i, correction, queuePool,
                        hasAutoCorrectionCandidate);
            }
        }
    }
    // Note: This line is intentionally left blank
    PROF_END(6);

    if (DEBUG_DICT) {
        queuePool->dumpSubQueue1TopSuggestions();
        for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) {
@@ -337,24 +319,6 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
    }
}

void UnigramDictionary::getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
        const int *ycoordinates, const int *codes, const bool useFullEditDistance,
        const int inputLength, const int missingSpacePos, Correction *correction,
        WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
    getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, inputLength, missingSpacePos, -1/* spaceProximityPos */,
            correction, queuePool, hasAutoCorrectionCandidate);
}

void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
        const int *ycoordinates, const int *codes, const bool useFullEditDistance,
        const int inputLength, const int spaceProximityPos, Correction *correction,
        WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) {
    getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, inputLength, -1 /* missingSpacePos */, spaceProximityPos,
            correction, queuePool, hasAutoCorrectionCandidate);
}

inline void UnigramDictionary::onTerminal(const int freq,
        const TerminalAttributes& terminalAttributes, Correction *correction,
        WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
@@ -405,15 +369,23 @@ inline void UnigramDictionary::onTerminal(const int freq,
    }
}

int UnigramDictionary::getSubStringSuggestion(
bool UnigramDictionary::getSubStringSuggestion(
        ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
        const int *codes, const bool useFullEditDistance, Correction *correction,
        WordsPriorityQueuePool* queuePool, const int inputLength,
        const bool hasAutoCorrectionCandidate, const int currentWordIndex,
        const int inputWordStartPos, const int inputWordLength,
        const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength) {
        const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
        int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) {
    if (DEBUG_DICT) {
        assert(currentWordIndex >= 1);
    }
    unsigned short* tempOutputWord = 0;
    int tempOutputWordLength = 0;
    // TODO: Optimize init suggestion
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);

    int freq = getMostFrequentWordLike(
            inputWordStartPos, inputWordLength, proximityInfo, mWord);
    if (freq > 0) {
@@ -438,7 +410,7 @@ int UnigramDictionary::getSubStringSuggestion(
        }
        WordsPriorityQueue* queue = queuePool->getSubQueue(currentWordIndex, inputWordLength);
        if (!queue || queue->size() < 1) {
            return 0;
            return false;
        }
        int score = 0;
        const double ns = queue->getHighestNormalizedScore(
@@ -451,93 +423,105 @@ int UnigramDictionary::getSubStringSuggestion(
        // threshold.
        if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD
                || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) {
            return 0;
            return false;
        }
        freq = score >> (tempOutputWordLength
                + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER);
    }
    if (DEBUG_DICT) {
        AKLOGI("Freq(%d): %d", currentWordIndex, freq);
        AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d"
                , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos);
    }
    if (freq <= 0 || tempOutputWordLength <= 0
            || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) {
        return 0;
        return false;
    }
    for (int i = 0; i < tempOutputWordLength; ++i) {
        outputWord[outputWordStartPos + i] = tempOutputWord[i];
    }

    // Put output values
    freqArray[currentWordIndex - 1] = freq;
    // TODO: put output length instead of input length
    wordLengthArray[currentWordIndex - 1] = inputWordLength;
    *outputWordLength = outputWordStartPos + tempOutputWordLength;

    if ((inputWordStartPos + inputWordLength) < inputLength) {
        if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) {
            return 0;
            return false;
        }
        outputWord[outputWordStartPos + tempOutputWordLength] = SPACE;
        ++tempOutputWordLength;
        ++*outputWordLength;
    } else if (currentWordIndex >= 2) {
        // TODO: Handle 3 or more words
        const int pairFreq = correction->getFreqForSplitTwoWords(
                freqArray, wordLengthArray, isSpaceProximity, outputWord);
        if (DEBUG_DICT) {
            AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq,
                    inputLength);
        }
    *outputWordLength = outputWordStartPos + tempOutputWordLength;
    return freq;
        addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue());
    }
    return true;
}

void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, const int missingSpacePos,
        const int  spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
        Correction *correction, WordsPriorityQueuePool* queuePool,
        const bool hasAutoCorrectionCandidate) {
    if (inputLength >= MAX_WORD_LENGTH) return;
    if (DEBUG_DICT) {
        int inputCount = 0;
        if (spaceProximityPos >= 0) ++inputCount;
        if (missingSpacePos >= 0) ++inputCount;
        assert(inputCount <= 1);
        // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16
        assert(MAX_PROXIMITY_CHARS == 16);
    }

    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes,
            inputLength, correction);

    // Allocating fixed length array on stack
    unsigned short outputWord[MAX_WORD_LENGTH];
    int freqArray[SUB_QUEUE_MAX_WORD_INDEX];
    int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX];
    int outputWordLength = 0;

    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    const bool isSpaceProximity = spaceProximityPos >= 0;

    // First word
    int inputWordStartPos = 0;
    int inputWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos;
    const int firstFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
    int inputWordLength = wordDivideIndex;
    if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, outputWord, &outputWordLength);
    if (firstFreq <= 0) {
            FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */,
            freqArray, wordLengthArray, outputWord, &outputWordLength)) {
        return;
    }

    const int tempOutputWordLength = outputWordLength;
    // Second word
    inputWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos;
    inputWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1)
            : (inputLength - missingSpacePos);
    const int secondFreq = getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
    // Missing space
    inputWordStartPos = wordDivideIndex;
    inputWordLength = inputLength - wordDivideIndex;
    getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, outputWordLength, outputWord,
            &outputWordLength);
    if (secondFreq <= 0) {
        return;
    }
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength);

    // TODO: Remove initSuggestions and correction->setCorrectionParams
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
    // Mistyped space
    ++inputWordStartPos;
    --inputWordLength;

    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
            useFullEditDistance, false /* doAutoCompletion */, MAX_ERRORS_FOR_TWO_WORDS);
    const int pairFreq = correction->getFreqForSplitTwoWords(firstFreq, secondFreq, outputWord);
    if (DEBUG_DICT) {
        AKLOGI("Split two words:  %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength);
    if (inputWordLength <= 0) {
        return;
    }
    addWord(outputWord, outputWordLength, pairFreq, masterQueue);

    const int x = xcoordinates[inputWordStartPos - 1];
    const int y = ycoordinates[inputWordStartPos - 1];
    if (!proximityInfo->hasSpaceProximity(x, y)) {
        return;
    }

    getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes,
            useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate,
            SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength,
            true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength);
}

// Wrapper for getMostFrequentWordLikeInner, which matches it to the previous
// interface.
inline int UnigramDictionary::getMostFrequentWordLike(const int startInputIndex,
+5 −12
Original line number Diff line number Diff line
@@ -103,17 +103,9 @@ class UnigramDictionary {
            const int currentWordIndex);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
            const int missingSpacePos, Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool useFullEditDistance, const int inputLength, const int wordDivideIndex,
            Correction *correction, WordsPriorityQueuePool* queuePool,
            const bool hasAutoCorrectionCandidate);
    void getMissingSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, const int missingSpacePos, Correction *correction,
            WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
    void getMistypedSpaceWords(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, const int spaceProximityPos, Correction *correction,
            WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
            Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue,
            const int currentWordIndex);
@@ -127,13 +119,14 @@ class UnigramDictionary {
            ProximityInfo *proximityInfo, unsigned short *word);
    int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
            short unsigned int *outWord);
    int getSubStringSuggestion(
    bool getSubStringSuggestion(
            ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates,
            const int *codes, const bool useFullEditDistance, Correction *correction,
            WordsPriorityQueuePool* queuePool, const int inputLength,
            const bool hasAutoCorrectionCandidate, const int currentWordIndex,
            const int inputWordStartPos, const int inputWordLength,
            const int outputWordStartPos, unsigned short* outputWord, int *outputWordLength);
            const int outputWordStartPos, const bool isSpaceProximity, int *freqArray,
            int *wordLengthArray, unsigned short* outputWord, int *outputWordLength);

    const uint8_t* const DICT_ROOT;
    const int MAX_WORD_LENGTH;