Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 67e13976 authored by satok's avatar satok Committed by Android (Google) Code Review
Browse files

Merge "Store suggestions for each input length for missing space algorithm etc."

parents 9c283804 6ad15fcd
Loading
Loading
Loading
Loading
+15 −17
Original line number Original line Diff line number Diff line
@@ -214,21 +214,11 @@ int Correction::goDownTree(
    return mOutputIndex;
    return mOutputIndex;
}
}


// TODO: remove
int Correction::getOutputIndex() {
    return mOutputIndex;
}

// TODO: remove
// TODO: remove
int Correction::getInputIndex() {
int Correction::getInputIndex() {
    return mInputIndex;
    return mInputIndex;
}
}


// TODO: remove
bool Correction::needsToTraverseAllNodes() {
    return mNeedsToTraverseAllNodes;
}

void Correction::incrementInputIndex() {
void Correction::incrementInputIndex() {
    ++mInputIndex;
    ++mInputIndex;
}
}
@@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
            mWord, mOutputIndex + 1);
            mWord, mOutputIndex + 1);
}
}


// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
Correction::CorrectionType Correction::processSkipChar(
        const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
        const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
    addCharToCurrentWord(c);
    addCharToCurrentWord(c);
    if (needsToTraverseAllNodes() && isTerminal) {
    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
    mTerminalOutputIndex = mOutputIndex;
    mTerminalOutputIndex = mOutputIndex;
    if (mNeedsToTraverseAllNodes && isTerminal) {
        incrementOutputIndex();
        incrementOutputIndex();
        return TRAVERSE_ALL_ON_TERMINAL;
        return TRAVERSE_ALL_ON_TERMINAL;
    } else {
    } else {
@@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
    }
    }
}
}


Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
    // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
    mTerminalInputIndex = mInputIndex;
    mTerminalOutputIndex = mOutputIndex;
    return UNRELATED;
}

inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
    return type == ProximityInfo::EQUIVALENT_CHAR;
    return type == ProximityInfo::EQUIVALENT_CHAR;
}
}
@@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        const int32_t c, const bool isTerminal) {
        const int32_t c, const bool isTerminal) {
    const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    if (correctionCount > mMaxErrors) {
    if (correctionCount > mMaxErrors) {
        return UNRELATED;
        return processUnrelatedCorrectionType();
    }
    }


    // TODO: Change the limit if we'll allow two or more corrections
    // TODO: Change the limit if we'll allow two or more corrections
@@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                        mTransposedCount, mExcessiveCount, c);
                        mTransposedCount, mExcessiveCount, c);
            }
            }
            return UNRELATED;
            return processUnrelatedCorrectionType();
        }
        }
    }
    }


@@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                        mTransposedCount, mExcessiveCount, c);
                        mTransposedCount, mExcessiveCount, c);
            }
            }
            return UNRELATED;
            return processUnrelatedCorrectionType();
        }
        }
    } else if (secondTransposing) {
    } else if (secondTransposing) {
        // If inputIndex is greater than mInputLength, that means there is no
        // If inputIndex is greater than mInputLength, that means there is no
@@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        }
        }
        return ON_TERMINAL;
        return ON_TERMINAL;
    } else {
    } else {
        mTerminalInputIndex = mInputIndex - 1;
        mTerminalOutputIndex = mOutputIndex - 1;
        return NOT_ON_TERMINAL;
        return NOT_ON_TERMINAL;
    }
    }
}
}
+1 −2
Original line number Original line Diff line number Diff line
@@ -48,7 +48,6 @@ class Correction {
    void checkState();
    void checkState();
    bool initProcessState(const int index);
    bool initProcessState(const int index);


    int getOutputIndex();
    int getInputIndex();
    int getInputIndex();


    virtual ~Correction();
    virtual ~Correction();
@@ -115,11 +114,11 @@ class Correction {
 private:
 private:
    inline void incrementInputIndex();
    inline void incrementInputIndex();
    inline void incrementOutputIndex();
    inline void incrementOutputIndex();
    inline bool needsToTraverseAllNodes();
    inline void startToTraverseAllNodes();
    inline void startToTraverseAllNodes();
    inline bool isQuote(const unsigned short c);
    inline bool isQuote(const unsigned short c);
    inline CorrectionType processSkipChar(
    inline CorrectionType processSkipChar(
            const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
            const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
    inline CorrectionType processUnrelatedCorrectionType();
    inline void addCharToCurrentWord(const int32_t c);
    inline void addCharToCurrentWord(const int32_t c);


    const int TYPED_LETTER_MULTIPLIER;
    const int TYPED_LETTER_MULTIPLIER;
+16 −14
Original line number Original line Diff line number Diff line
@@ -22,9 +22,23 @@
#include <cutils/log.h>
#include <cutils/log.h>
#define AKLOGE ALOGE
#define AKLOGE ALOGE
#define AKLOGI ALOGI
#define AKLOGI ALOGI

#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)

static char charBuf[50];

static void dumpWord(const unsigned short* word, const int length) {
    for (int i = 0; i < length; ++i) {
        charBuf[i] = word[i];
    }
    charBuf[length] = 0;
    AKLOGI("[ %s ]", charBuf);
}

#else
#else
#define AKLOGE(fmt, ...)
#define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...)
#define AKLOGI(fmt, ...)
#define DUMP_WORD(word, length)
#endif
#endif


#ifdef FLAG_DO_PROFILE
#ifdef FLAG_DO_PROFILE
@@ -106,18 +120,6 @@ static void prof_out(void) {
#define DEBUG_CORRECTION_FREQ true
#define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true
#define DEBUG_WORDS_PRIORITY_QUEUE true


#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)

static char charBuf[50];

static void dumpWord(const unsigned short* word, const int length) {
    for (int i = 0; i < length; ++i) {
        charBuf[i] = word[i];
    }
    charBuf[length] = 0;
    AKLOGI("[ %s ]", charBuf);
}

#else // FLAG_DBG
#else // FLAG_DBG


#define DEBUG_DICT false
#define DEBUG_DICT false
@@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false
#define DEBUG_WORDS_PRIORITY_QUEUE false


#define DUMP_WORD(word, length)


#endif // FLAG_DBG
#endif // FLAG_DBG


@@ -207,7 +208,8 @@ static void dumpWord(const unsigned short* word, const int length) {


// Word limit for sub queues used in WordsPriorityQueuePool.  Sub queues are temporary queues used
// Word limit for sub queues used in WordsPriorityQueuePool.  Sub queues are temporary queues used
// for better performance.
// for better performance.
#define SUB_QUEUE_MAX_WORDS 5
// Holds up to 1 candidate for each word
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10
#define SUB_QUEUE_MAX_COUNT 10


#define MAX_DEPTH_MULTIPLIER 3
#define MAX_DEPTH_MULTIPLIER 3
+41 −27
Original line number Original line Diff line number Diff line
@@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,


    PROF_OPEN;
    PROF_OPEN;
    PROF_START(0);
    PROF_START(0);
    // Note: This line is intentionally left blank
    queuePool->clearAll();
    PROF_END(0);
    PROF_END(0);


    PROF_START(1);
    PROF_START(1);
@@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
        }
        }
    }
    }
    PROF_END(6);
    PROF_END(6);
    if (DEBUG_WORDS_PRIORITY_QUEUE) {
        queuePool->dumpSubQueue1TopSuggestions();
    }
}
}


void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
        const int *yCoordinates, const int *codes, const int inputLength,
        const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
        WordsPriorityQueue *queue, Correction *correction) {
    if (DEBUG_DICT) {
    if (DEBUG_DICT) {
        AKLOGI("initSuggest");
        AKLOGI("initSuggest");
    }
    }
    proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
    proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
    if (queue) {
        queue->clear();
    }
    const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
    const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
    correction->initCorrection(proximityInfo, inputLength, maxDepth);
    correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
}
@@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, Correction *correction,
        const bool useFullEditDistance, const int inputLength, Correction *correction,
        WordsPriorityQueuePool *queuePool) {
        WordsPriorityQueuePool *queuePool) {
    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
    initSuggestions(
    getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
            proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
    getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
            true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
            true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
}
}


void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
        const int inputLength, Correction *correction, WordsPriorityQueue *queue,
        const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
        const bool doAutoCompletion, const int maxErrors) {
        const bool doAutoCompletion, const int maxErrors) {
    // TODO: Remove setCorrectionParams
    // TODO: Remove setCorrectionParams
    correction->setCorrectionParams(0, 0, 0,
    correction->setCorrectionParams(0, 0, 0,
@@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
            int firstChildPos;
            int firstChildPos;


            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
                    correction, &childCount, &firstChildPos, &siblingPos, queue);
                    correction, &childCount, &firstChildPos, &siblingPos, queuePool);
            // Update next sibling pos
            // Update next sibling pos
            correction->setTreeSiblingPos(outputIndex, siblingPos);
            correction->setTreeSiblingPos(outputIndex, siblingPos);


@@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons


inline void UnigramDictionary::onTerminal(const int freq,
inline void UnigramDictionary::onTerminal(const int freq,
        const TerminalAttributes& terminalAttributes, Correction *correction,
        const TerminalAttributes& terminalAttributes, Correction *correction,
        WordsPriorityQueue *queue) {
        WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
    const int inputIndex = correction->getInputIndex();
    const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
    if (!addToMasterQueue && !addToSubQueue) {
        return;
    }
    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
    int wordLength;
    int wordLength;
    unsigned short* wordPointer;
    unsigned short* wordPointer;
    const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
    const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
    if (finalFreq >= 0) {
    if (finalFreq >= 0) {
        if (!terminalAttributes.isShortcutOnly()) {
        if (!terminalAttributes.isShortcutOnly()) {
            addWord(wordPointer, wordLength, finalFreq, queue);
            if (addToMasterQueue) {
                addWord(wordPointer, wordLength, finalFreq, masterQueue);
            }
            }
            // TODO: Check the validity of "inputIndex == wordLength"
            //if (addToSubQueue && inputIndex == wordLength) {
            if (addToSubQueue) {
                addWord(wordPointer, wordLength, finalFreq, subQueue);
            }
        }
        // Please note that the shortcut candidates will be added to the master queue only.
        if (!addToMasterQueue) {
            return;
        }

        // From here, below is the code to add shortcut candidates.
        TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
        TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
        while (iterator.hasNextShortcutTarget()) {
        while (iterator.hasNextShortcutTarget()) {
            // TODO: addWord only supports weak ordering, meaning we have no means to control the
            // TODO: addWord only supports weak ordering, meaning we have no means to control the
@@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
            uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
            uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
            const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
            const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                    MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
                    MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
        }
        }
    }
    }
}
}
@@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
    }
    }


    // TODO: Remove initSuggestions and correction->setCorrectionParams
    // TODO: Remove initSuggestions and correction->setCorrectionParams
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
            0 /* do not clear queue */, correction);


    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
@@ -584,7 +600,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
        Correction *correction, int *newCount,
        Correction *correction, int *newCount,
        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
    if (DEBUG_DICT) {
    if (DEBUG_DICT) {
        correction->checkState();
        correction->checkState();
    }
    }
@@ -659,15 +675,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
    } while (NOT_A_CHARACTER != c);
    } while (NOT_A_CHARACTER != c);


    if (isTerminalNode) {
    if (isTerminalNode) {
        if (needsToInvokeOnTerminal) {
        // The frequency should be here, because we come here only if this is actually
        // The frequency should be here, because we come here only if this is actually
        // a terminal node, and we are on its last char.
        // a terminal node, and we are on its last char.
        const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
        const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
        const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
        const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
        const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
        const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
        TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
        TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
            onTerminal(freq, terminalAttributes, correction, queue);
        onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);
        }


        // If there are more chars in this node, then this virtual node has children.
        // If there are more chars in this node, then this virtual node has children.
        // If we are on the last char, this virtual node has children if this node has.
        // If we are on the last char, this virtual node has children if this node has.
+4 −5
Original line number Original line Diff line number Diff line
@@ -93,14 +93,13 @@ class UnigramDictionary {
        const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
        const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
        WordsPriorityQueuePool* queuePool);
        WordsPriorityQueuePool* queuePool);
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int codesSize,
            const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
            WordsPriorityQueue *queue, Correction *correction);
    void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
    void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
            const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
    void getSuggestionCandidates(
    void getSuggestionCandidates(
            const bool useFullEditDistance, const int inputLength, Correction *correction,
            const bool useFullEditDistance, const int inputLength, Correction *correction,
            WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
            WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
            const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@@ -114,12 +113,12 @@ class UnigramDictionary {
            const int inputLength, const int spaceProximityPos, Correction *correction,
            const int inputLength, const int spaceProximityPos, Correction *correction,
            WordsPriorityQueuePool* queuePool);
            WordsPriorityQueuePool* queuePool);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
            Correction *correction, WordsPriorityQueue *queue);
            Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
    bool needsToSkipCurrentNode(const unsigned short c,
    bool needsToSkipCurrentNode(const unsigned short c,
            const int inputIndex, const int skipPos, const int depth);
            const int inputIndex, const int skipPos, const int depth);
    // Process a node by considering proximity, missing and excessive character
    // Process a node by considering proximity, missing and excessive character
    bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
    bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
    int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
    int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
            ProximityInfo *proximityInfo, unsigned short *word);
            ProximityInfo *proximityInfo, unsigned short *word);
    int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
    int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
Loading