Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6ad15fcd authored by satok's avatar satok
Browse files

Store suggestions for each input length for missing space algorithm etc.

Change-Id: Ief8f6ddd29e043744863e5b9be3a51a70987291c
parent 327c6828
Loading
Loading
Loading
Loading
+15 −17
Original line number Diff line number Diff line
@@ -214,21 +214,11 @@ int Correction::goDownTree(
    return mOutputIndex;
}

// TODO: remove
int Correction::getOutputIndex() {
    return mOutputIndex;
}

// TODO: remove
int Correction::getInputIndex() {
    return mInputIndex;
}

// TODO: remove
bool Correction::needsToTraverseAllNodes() {
    return mNeedsToTraverseAllNodes;
}

void Correction::incrementInputIndex() {
    ++mInputIndex;
}
@@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) {
            mWord, mOutputIndex + 1);
}

// TODO: inline?
Correction::CorrectionType Correction::processSkipChar(
        const int32_t c, const bool isTerminal, const bool inputIndexIncremented) {
    addCharToCurrentWord(c);
    if (needsToTraverseAllNodes() && isTerminal) {
    mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0);
    mTerminalOutputIndex = mOutputIndex;
    if (mNeedsToTraverseAllNodes && isTerminal) {
        incrementOutputIndex();
        return TRAVERSE_ALL_ON_TERMINAL;
    } else {
@@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar(
    }
}

Correction::CorrectionType Correction::processUnrelatedCorrectionType() {
    // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType
    mTerminalInputIndex = mInputIndex;
    mTerminalOutputIndex = mOutputIndex;
    return UNRELATED;
}

inline bool isEquivalentChar(ProximityInfo::ProximityType type) {
    return type == ProximityInfo::EQUIVALENT_CHAR;
}
@@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        const int32_t c, const bool isTerminal) {
    const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount);
    if (correctionCount > mMaxErrors) {
        return UNRELATED;
        return processUnrelatedCorrectionType();
    }

    // TODO: Change the limit if we'll allow two or more corrections
@@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                        mTransposedCount, mExcessiveCount, c);
            }
            return UNRELATED;
            return processUnrelatedCorrectionType();
        }
    }

@@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState(
                AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount,
                        mTransposedCount, mExcessiveCount, c);
            }
            return UNRELATED;
            return processUnrelatedCorrectionType();
        }
    } else if (secondTransposing) {
        // If inputIndex is greater than mInputLength, that means there is no
@@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState(
        }
        return ON_TERMINAL;
    } else {
        mTerminalInputIndex = mInputIndex - 1;
        mTerminalOutputIndex = mOutputIndex - 1;
        return NOT_ON_TERMINAL;
    }
}
+1 −2
Original line number Diff line number Diff line
@@ -48,7 +48,6 @@ class Correction {
    void checkState();
    bool initProcessState(const int index);

    int getOutputIndex();
    int getInputIndex();

    virtual ~Correction();
@@ -115,11 +114,11 @@ class Correction {
 private:
    inline void incrementInputIndex();
    inline void incrementOutputIndex();
    inline bool needsToTraverseAllNodes();
    inline void startToTraverseAllNodes();
    inline bool isQuote(const unsigned short c);
    inline CorrectionType processSkipChar(
            const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
    inline CorrectionType processUnrelatedCorrectionType();
    inline void addCharToCurrentWord(const int32_t c);

    const int TYPED_LETTER_MULTIPLIER;
+16 −14
Original line number Diff line number Diff line
@@ -22,9 +22,23 @@
#include <cutils/log.h>
#define AKLOGE ALOGE
#define AKLOGI ALOGI

#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)

static char charBuf[50];

static void dumpWord(const unsigned short* word, const int length) {
    for (int i = 0; i < length; ++i) {
        charBuf[i] = word[i];
    }
    charBuf[length] = 0;
    AKLOGI("[ %s ]", charBuf);
}

#else
#define AKLOGE(fmt, ...)
#define AKLOGI(fmt, ...)
#define DUMP_WORD(word, length)
#endif

#ifdef FLAG_DO_PROFILE
@@ -106,18 +120,6 @@ static void prof_out(void) {
#define DEBUG_CORRECTION_FREQ true
#define DEBUG_WORDS_PRIORITY_QUEUE true

#define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0)

static char charBuf[50];

static void dumpWord(const unsigned short* word, const int length) {
    for (int i = 0; i < length; ++i) {
        charBuf[i] = word[i];
    }
    charBuf[length] = 0;
    AKLOGI("[ %s ]", charBuf);
}

#else // FLAG_DBG

#define DEBUG_DICT false
@@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) {
#define DEBUG_CORRECTION_FREQ false
#define DEBUG_WORDS_PRIORITY_QUEUE false

#define DUMP_WORD(word, length)

#endif // FLAG_DBG

@@ -207,7 +208,8 @@ static void dumpWord(const unsigned short* word, const int length) {

// Word limit for sub queues used in WordsPriorityQueuePool.  Sub queues are temporary queues used
// for better performance.
#define SUB_QUEUE_MAX_WORDS 5
// Holds up to 1 candidate for each word
#define SUB_QUEUE_MAX_WORDS 1
#define SUB_QUEUE_MAX_COUNT 10

#define MAX_DEPTH_MULTIPLIER 3
+41 −27
Original line number Diff line number Diff line
@@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,

    PROF_OPEN;
    PROF_START(0);
    // Note: This line is intentionally left blank
    queuePool->clearAll();
    PROF_END(0);

    PROF_START(1);
@@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo,
        }
    }
    PROF_END(6);
    if (DEBUG_WORDS_PRIORITY_QUEUE) {
        queuePool->dumpSubQueue1TopSuggestions();
    }
}

void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates,
        const int *yCoordinates, const int *codes, const int inputLength,
        WordsPriorityQueue *queue, Correction *correction) {
        const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) {
    if (DEBUG_DICT) {
        AKLOGI("initSuggest");
    }
    proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates);
    if (queue) {
        queue->clear();
    }
    const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
    correction->initCorrection(proximityInfo, inputLength, maxDepth);
}
@@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int *ycoordinates, const int *codes,
        const bool useFullEditDistance, const int inputLength, Correction *correction,
        WordsPriorityQueuePool *queuePool) {
    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    initSuggestions(
            proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction);
    getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue,
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);
    getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool,
            true /* doAutoCompletion */, DEFAULT_MAX_ERRORS);
}

void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
        const int inputLength, Correction *correction, WordsPriorityQueue *queue,
        const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool,
        const bool doAutoCompletion, const int maxErrors) {
    // TODO: Remove setCorrectionParams
    correction->setCorrectionParams(0, 0, 0,
@@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance,
            int firstChildPos;

            const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos,
                    correction, &childCount, &firstChildPos, &siblingPos, queue);
                    correction, &childCount, &firstChildPos, &siblingPos, queuePool);
            // Update next sibling pos
            correction->setTreeSiblingPos(outputIndex, siblingPos);

@@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons

inline void UnigramDictionary::onTerminal(const int freq,
        const TerminalAttributes& terminalAttributes, Correction *correction,
        WordsPriorityQueue *queue) {
        WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) {
    const int inputIndex = correction->getInputIndex();
    const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT;
    if (!addToMasterQueue && !addToSubQueue) {
        return;
    }
    WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
    WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex);
    int wordLength;
    unsigned short* wordPointer;
    const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
    if (finalFreq >= 0) {
        if (!terminalAttributes.isShortcutOnly()) {
            addWord(wordPointer, wordLength, finalFreq, queue);
            if (addToMasterQueue) {
                addWord(wordPointer, wordLength, finalFreq, masterQueue);
            }
            // TODO: Check the validity of "inputIndex == wordLength"
            //if (addToSubQueue && inputIndex == wordLength) {
            if (addToSubQueue) {
                addWord(wordPointer, wordLength, finalFreq, subQueue);
            }
        }
        // Please note that the shortcut candidates will be added to the master queue only.
        if (!addToMasterQueue) {
            return;
        }

        // From here, below is the code to add shortcut candidates.
        TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator();
        while (iterator.hasNextShortcutTarget()) {
            // TODO: addWord only supports weak ordering, meaning we have no means to control the
@@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq,
            uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
            const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                    MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue);
            addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
        }
    }
}
@@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo
    }

    // TODO: Remove initSuggestions and correction->setCorrectionParams
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength,
            0 /* do not clear queue */, correction);
    initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction);

    correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */,
            -1 /* transposedPos */, spaceProximityPos, missingSpacePos,
@@ -583,7 +599,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs
// given level, as output into newCount when traversing this level's parent.
inline bool UnigramDictionary::processCurrentNode(const int initialPos,
        Correction *correction, int *newCount,
        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) {
        int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) {
    if (DEBUG_DICT) {
        correction->checkState();
    }
@@ -658,15 +674,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos,
    } while (NOT_A_CHARACTER != c);

    if (isTerminalNode) {
        if (needsToInvokeOnTerminal) {
        // The frequency should be here, because we come here only if this is actually
        // a terminal node, and we are on its last char.
        const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos);
        const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos);
        const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos);
        TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos);
            onTerminal(freq, terminalAttributes, correction, queue);
        }
        onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal);

        // If there are more chars in this node, then this virtual node has children.
        // If we are on the last char, this virtual node has children if this node has.
+4 −5
Original line number Diff line number Diff line
@@ -93,14 +93,13 @@ class UnigramDictionary {
        const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
        WordsPriorityQueuePool* queuePool);
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int codesSize,
            WordsPriorityQueue *queue, Correction *correction);
            const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
    void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const bool useFullEditDistance,
            const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool);
    void getSuggestionCandidates(
            const bool useFullEditDistance, const int inputLength, Correction *correction,
            WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors);
            WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors);
    void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo,
            const int *xcoordinates, const int *ycoordinates, const int *codes,
            const bool useFullEditDistance, const int inputLength, const int spaceProximityPos,
@@ -114,12 +113,12 @@ class UnigramDictionary {
            const int inputLength, const int spaceProximityPos, Correction *correction,
            WordsPriorityQueuePool* queuePool);
    void onTerminal(const int freq, const TerminalAttributes& terminalAttributes,
            Correction *correction, WordsPriorityQueue *queue);
            Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue);
    bool needsToSkipCurrentNode(const unsigned short c,
            const int inputIndex, const int skipPos, const int depth);
    // Process a node by considering proximity, missing and excessive character
    bool processCurrentNode(const int initialPos, Correction *correction, int *newCount,
            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue);
            int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool);
    int getMostFrequentWordLike(const int startInputIndex, const int inputLength,
            ProximityInfo *proximityInfo, unsigned short *word);
    int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length,
Loading