Loading native/src/correction.cpp +15 −17 Original line number Original line Diff line number Diff line Loading @@ -214,21 +214,11 @@ int Correction::goDownTree( return mOutputIndex; return mOutputIndex; } } // TODO: remove int Correction::getOutputIndex() { return mOutputIndex; } // TODO: remove // TODO: remove int Correction::getInputIndex() { int Correction::getInputIndex() { return mInputIndex; return mInputIndex; } } // TODO: remove bool Correction::needsToTraverseAllNodes() { return mNeedsToTraverseAllNodes; } void Correction::incrementInputIndex() { void Correction::incrementInputIndex() { ++mInputIndex; ++mInputIndex; } } Loading Loading @@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) { mWord, mOutputIndex + 1); mWord, mOutputIndex + 1); } } // TODO: inline? Correction::CorrectionType Correction::processSkipChar( Correction::CorrectionType Correction::processSkipChar( const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { addCharToCurrentWord(c); addCharToCurrentWord(c); if (needsToTraverseAllNodes() && isTerminal) { mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalOutputIndex = mOutputIndex; mTerminalOutputIndex = mOutputIndex; if (mNeedsToTraverseAllNodes && isTerminal) { incrementOutputIndex(); incrementOutputIndex(); return TRAVERSE_ALL_ON_TERMINAL; return TRAVERSE_ALL_ON_TERMINAL; } else { } else { Loading @@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar( } } } } Correction::CorrectionType Correction::processUnrelatedCorrectionType() { // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType mTerminalInputIndex = mInputIndex; mTerminalOutputIndex = mOutputIndex; return UNRELATED; } inline bool isEquivalentChar(ProximityInfo::ProximityType type) { inline bool isEquivalentChar(ProximityInfo::ProximityType type) { return type == ProximityInfo::EQUIVALENT_CHAR; return type == ProximityInfo::EQUIVALENT_CHAR; } } Loading @@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); if (correctionCount > mMaxErrors) { if (correctionCount > mMaxErrors) { return UNRELATED; return processUnrelatedCorrectionType(); } } // TODO: Change the limit if we'll allow two or more corrections // TODO: Change the limit if we'll allow two or more corrections Loading Loading @@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); mTransposedCount, mExcessiveCount, c); } } return UNRELATED; return processUnrelatedCorrectionType(); } } } } Loading Loading @@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); mTransposedCount, mExcessiveCount, c); } } return UNRELATED; return processUnrelatedCorrectionType(); } } } else if (secondTransposing) { } else if (secondTransposing) { // If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no Loading Loading @@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } return ON_TERMINAL; return ON_TERMINAL; } else { } else { mTerminalInputIndex = mInputIndex - 1; mTerminalOutputIndex = mOutputIndex - 1; return NOT_ON_TERMINAL; return NOT_ON_TERMINAL; } } } } Loading native/src/correction.h +1 −2 Original line number Original line Diff line number Diff line Loading @@ -48,7 +48,6 @@ class Correction { void checkState(); void checkState(); bool initProcessState(const int index); bool initProcessState(const int index); int getOutputIndex(); int getInputIndex(); int getInputIndex(); virtual ~Correction(); virtual ~Correction(); Loading Loading @@ -115,11 +114,11 @@ class Correction { private: private: inline void incrementInputIndex(); inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void incrementOutputIndex(); inline bool needsToTraverseAllNodes(); inline void startToTraverseAllNodes(); inline void startToTraverseAllNodes(); inline bool isQuote(const unsigned short c); inline bool isQuote(const unsigned short c); inline CorrectionType processSkipChar( inline CorrectionType processSkipChar( const int32_t c, const bool isTerminal, const bool inputIndexIncremented); const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline void addCharToCurrentWord(const int32_t c); const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER; Loading native/src/defines.h +16 −14 Original line number Original line Diff line number Diff line Loading @@ -22,9 +22,23 @@ #include <cutils/log.h> #include <cutils/log.h> #define AKLOGE ALOGE #define AKLOGE ALOGE #define AKLOGI ALOGI #define AKLOGI ALOGI #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) static char charBuf[50]; static void dumpWord(const unsigned short* word, const int length) { for (int i = 0; i < length; ++i) { charBuf[i] = word[i]; } charBuf[length] = 0; AKLOGI("[ %s ]", charBuf); } #else #else #define AKLOGE(fmt, ...) #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) #define AKLOGI(fmt, ...) #define DUMP_WORD(word, length) #endif #endif #ifdef FLAG_DO_PROFILE #ifdef FLAG_DO_PROFILE Loading Loading @@ -106,18 +120,6 @@ static void prof_out(void) { #define DEBUG_CORRECTION_FREQ true #define DEBUG_CORRECTION_FREQ true #define DEBUG_WORDS_PRIORITY_QUEUE true #define DEBUG_WORDS_PRIORITY_QUEUE true #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) static char charBuf[50]; static void dumpWord(const unsigned short* word, const int length) { for (int i = 0; i < length; ++i) { charBuf[i] = word[i]; } charBuf[length] = 0; AKLOGI("[ %s ]", charBuf); } #else // FLAG_DBG #else // FLAG_DBG #define DEBUG_DICT false #define DEBUG_DICT false Loading @@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false #define DEBUG_WORDS_PRIORITY_QUEUE false #define DUMP_WORD(word, length) #endif // FLAG_DBG #endif // FLAG_DBG Loading Loading @@ -207,7 +208,8 @@ static void dumpWord(const unsigned short* word, const int length) { // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used // for better performance. // for better performance. #define SUB_QUEUE_MAX_WORDS 5 // Holds up to 1 candidate for each word #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MAX_COUNT 10 #define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3 Loading native/src/unigram_dictionary.cpp +41 −27 Original line number Original line Diff line number Diff line Loading @@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_OPEN; PROF_OPEN; PROF_START(0); PROF_START(0); // Note: This line is intentionally left blank queuePool->clearAll(); PROF_END(0); PROF_END(0); PROF_START(1); PROF_START(1); Loading Loading @@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } } } } PROF_END(6); PROF_END(6); if (DEBUG_WORDS_PRIORITY_QUEUE) { queuePool->dumpSubQueue1TopSuggestions(); } } } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, const int *yCoordinates, const int *codes, const int inputLength, const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) { WordsPriorityQueue *queue, Correction *correction) { if (DEBUG_DICT) { if (DEBUG_DICT) { AKLOGI("initSuggest"); AKLOGI("initSuggest"); } } proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); if (queue) { queue->clear(); } const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); correction->initCorrection(proximityInfo, inputLength, maxDepth); } } Loading @@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool) { WordsPriorityQueuePool *queuePool) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); initSuggestions( getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool, proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); } } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors) { const bool doAutoCompletion, const int maxErrors) { // TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0, Loading @@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, int firstChildPos; int firstChildPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, correction, &childCount, &firstChildPos, &siblingPos, queue); correction, &childCount, &firstChildPos, &siblingPos, queuePool); // Update next sibling pos // Update next sibling pos correction->setTreeSiblingPos(outputIndex, siblingPos); correction->setTreeSiblingPos(outputIndex, siblingPos); Loading Loading @@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons inline void UnigramDictionary::onTerminal(const int freq, inline void UnigramDictionary::onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueue *queue) { WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { const int inputIndex = correction->getInputIndex(); const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; if (!addToMasterQueue && !addToSubQueue) { return; } WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); int wordLength; int wordLength; unsigned short* wordPointer; unsigned short* wordPointer; const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { if (finalFreq >= 0) { if (!terminalAttributes.isShortcutOnly()) { if (!terminalAttributes.isShortcutOnly()) { addWord(wordPointer, wordLength, finalFreq, queue); if (addToMasterQueue) { addWord(wordPointer, wordLength, finalFreq, masterQueue); } } // TODO: Check the validity of "inputIndex == wordLength" //if (addToSubQueue && inputIndex == wordLength) { if (addToSubQueue) { addWord(wordPointer, wordLength, finalFreq, subQueue); } } // Please note that the shortcut candidates will be added to the master queue only. if (!addToMasterQueue) { return; } // From here, below is the code to add shortcut candidates. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); while (iterator.hasNextShortcutTarget()) { while (iterator.hasNextShortcutTarget()) { // TODO: addWord only supports weak ordering, meaning we have no means to control the // TODO: addWord only supports weak ordering, meaning we have no means to control the Loading @@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq, uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget); MAX_WORD_LENGTH_INTERNAL, shortcutTarget); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue); } } } } } } Loading Loading @@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo } } // TODO: Remove initSuggestions and correction->setCorrectionParams // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); 0 /* do not clear queue */, correction); correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, Loading Loading @@ -584,7 +600,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // given level, as output into newCount when traversing this level's parent. // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, inline bool UnigramDictionary::processCurrentNode(const int initialPos, Correction *correction, int *newCount, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) { int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) { if (DEBUG_DICT) { if (DEBUG_DICT) { correction->checkState(); correction->checkState(); } } Loading Loading @@ -659,15 +675,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, } while (NOT_A_CHARACTER != c); } while (NOT_A_CHARACTER != c); if (isTerminalNode) { if (isTerminalNode) { if (needsToInvokeOnTerminal) { // The frequency should be here, because we come here only if this is actually // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. // a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); onTerminal(freq, terminalAttributes, correction, queue); onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal); } // If there are more chars in this node, then this virtual node has children. // If there are more chars in this node, then this virtual node has children. // If we are on the last char, this virtual node has children if this node has. // If we are on the last char, this virtual node has children if this node has. Loading native/src/unigram_dictionary.h +4 −5 Original line number Original line Diff line number Diff line Loading @@ -93,14 +93,13 @@ class UnigramDictionary { const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); WordsPriorityQueue *queue, Correction *correction); void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); void getSuggestionCandidates( void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors); WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Loading @@ -114,12 +113,12 @@ class UnigramDictionary { const int inputLength, const int spaceProximityPos, Correction *correction, const int inputLength, const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool); void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueue *queue); Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue); bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue); int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength, ProximityInfo *proximityInfo, unsigned short *word); ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, Loading Loading
native/src/correction.cpp +15 −17 Original line number Original line Diff line number Diff line Loading @@ -214,21 +214,11 @@ int Correction::goDownTree( return mOutputIndex; return mOutputIndex; } } // TODO: remove int Correction::getOutputIndex() { return mOutputIndex; } // TODO: remove // TODO: remove int Correction::getInputIndex() { int Correction::getInputIndex() { return mInputIndex; return mInputIndex; } } // TODO: remove bool Correction::needsToTraverseAllNodes() { return mNeedsToTraverseAllNodes; } void Correction::incrementInputIndex() { void Correction::incrementInputIndex() { ++mInputIndex; ++mInputIndex; } } Loading Loading @@ -278,13 +268,12 @@ void Correction::addCharToCurrentWord(const int32_t c) { mWord, mOutputIndex + 1); mWord, mOutputIndex + 1); } } // TODO: inline? Correction::CorrectionType Correction::processSkipChar( Correction::CorrectionType Correction::processSkipChar( const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { const int32_t c, const bool isTerminal, const bool inputIndexIncremented) { addCharToCurrentWord(c); addCharToCurrentWord(c); if (needsToTraverseAllNodes() && isTerminal) { mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalInputIndex = mInputIndex - (inputIndexIncremented ? 1 : 0); mTerminalOutputIndex = mOutputIndex; mTerminalOutputIndex = mOutputIndex; if (mNeedsToTraverseAllNodes && isTerminal) { incrementOutputIndex(); incrementOutputIndex(); return TRAVERSE_ALL_ON_TERMINAL; return TRAVERSE_ALL_ON_TERMINAL; } else { } else { Loading @@ -293,6 +282,13 @@ Correction::CorrectionType Correction::processSkipChar( } } } } Correction::CorrectionType Correction::processUnrelatedCorrectionType() { // Needs to set mTerminalInputIndex and mTerminalOutputIndex before returning any CorrectionType mTerminalInputIndex = mInputIndex; mTerminalOutputIndex = mOutputIndex; return UNRELATED; } inline bool isEquivalentChar(ProximityInfo::ProximityType type) { inline bool isEquivalentChar(ProximityInfo::ProximityType type) { return type == ProximityInfo::EQUIVALENT_CHAR; return type == ProximityInfo::EQUIVALENT_CHAR; } } Loading @@ -301,7 +297,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( const int32_t c, const bool isTerminal) { const int32_t c, const bool isTerminal) { const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); const int correctionCount = (mSkippedCount + mExcessiveCount + mTransposedCount); if (correctionCount > mMaxErrors) { if (correctionCount > mMaxErrors) { return UNRELATED; return processUnrelatedCorrectionType(); } } // TODO: Change the limit if we'll allow two or more corrections // TODO: Change the limit if we'll allow two or more corrections Loading Loading @@ -381,7 +377,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, AKLOGI("UNRELATED(0): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); mTransposedCount, mExcessiveCount, c); } } return UNRELATED; return processUnrelatedCorrectionType(); } } } } Loading Loading @@ -484,7 +480,7 @@ Correction::CorrectionType Correction::processCharAndCalcState( AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, AKLOGI("UNRELATED(1): %d, %d, %d, %d, %c", mProximityCount, mSkippedCount, mTransposedCount, mExcessiveCount, c); mTransposedCount, mExcessiveCount, c); } } return UNRELATED; return processUnrelatedCorrectionType(); } } } else if (secondTransposing) { } else if (secondTransposing) { // If inputIndex is greater than mInputLength, that means there is no // If inputIndex is greater than mInputLength, that means there is no Loading Loading @@ -539,6 +535,8 @@ Correction::CorrectionType Correction::processCharAndCalcState( } } return ON_TERMINAL; return ON_TERMINAL; } else { } else { mTerminalInputIndex = mInputIndex - 1; mTerminalOutputIndex = mOutputIndex - 1; return NOT_ON_TERMINAL; return NOT_ON_TERMINAL; } } } } Loading
native/src/correction.h +1 −2 Original line number Original line Diff line number Diff line Loading @@ -48,7 +48,6 @@ class Correction { void checkState(); void checkState(); bool initProcessState(const int index); bool initProcessState(const int index); int getOutputIndex(); int getInputIndex(); int getInputIndex(); virtual ~Correction(); virtual ~Correction(); Loading Loading @@ -115,11 +114,11 @@ class Correction { private: private: inline void incrementInputIndex(); inline void incrementInputIndex(); inline void incrementOutputIndex(); inline void incrementOutputIndex(); inline bool needsToTraverseAllNodes(); inline void startToTraverseAllNodes(); inline void startToTraverseAllNodes(); inline bool isQuote(const unsigned short c); inline bool isQuote(const unsigned short c); inline CorrectionType processSkipChar( inline CorrectionType processSkipChar( const int32_t c, const bool isTerminal, const bool inputIndexIncremented); const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline void addCharToCurrentWord(const int32_t c); const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER; Loading
native/src/defines.h +16 −14 Original line number Original line Diff line number Diff line Loading @@ -22,9 +22,23 @@ #include <cutils/log.h> #include <cutils/log.h> #define AKLOGE ALOGE #define AKLOGE ALOGE #define AKLOGI ALOGI #define AKLOGI ALOGI #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) static char charBuf[50]; static void dumpWord(const unsigned short* word, const int length) { for (int i = 0; i < length; ++i) { charBuf[i] = word[i]; } charBuf[length] = 0; AKLOGI("[ %s ]", charBuf); } #else #else #define AKLOGE(fmt, ...) #define AKLOGE(fmt, ...) #define AKLOGI(fmt, ...) #define AKLOGI(fmt, ...) #define DUMP_WORD(word, length) #endif #endif #ifdef FLAG_DO_PROFILE #ifdef FLAG_DO_PROFILE Loading Loading @@ -106,18 +120,6 @@ static void prof_out(void) { #define DEBUG_CORRECTION_FREQ true #define DEBUG_CORRECTION_FREQ true #define DEBUG_WORDS_PRIORITY_QUEUE true #define DEBUG_WORDS_PRIORITY_QUEUE true #define DUMP_WORD(word, length) do { dumpWord(word, length); } while(0) static char charBuf[50]; static void dumpWord(const unsigned short* word, const int length) { for (int i = 0; i < length; ++i) { charBuf[i] = word[i]; } charBuf[length] = 0; AKLOGI("[ %s ]", charBuf); } #else // FLAG_DBG #else // FLAG_DBG #define DEBUG_DICT false #define DEBUG_DICT false Loading @@ -131,7 +133,6 @@ static void dumpWord(const unsigned short* word, const int length) { #define DEBUG_CORRECTION_FREQ false #define DEBUG_CORRECTION_FREQ false #define DEBUG_WORDS_PRIORITY_QUEUE false #define DEBUG_WORDS_PRIORITY_QUEUE false #define DUMP_WORD(word, length) #endif // FLAG_DBG #endif // FLAG_DBG Loading Loading @@ -207,7 +208,8 @@ static void dumpWord(const unsigned short* word, const int length) { // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used // Word limit for sub queues used in WordsPriorityQueuePool. Sub queues are temporary queues used // for better performance. // for better performance. #define SUB_QUEUE_MAX_WORDS 5 // Holds up to 1 candidate for each word #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MAX_COUNT 10 #define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3 Loading
native/src/unigram_dictionary.cpp +41 −27 Original line number Original line Diff line number Diff line Loading @@ -186,7 +186,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, PROF_OPEN; PROF_OPEN; PROF_START(0); PROF_START(0); // Note: This line is intentionally left blank queuePool->clearAll(); PROF_END(0); PROF_END(0); PROF_START(1); PROF_START(1); Loading Loading @@ -241,18 +241,17 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, } } } } PROF_END(6); PROF_END(6); if (DEBUG_WORDS_PRIORITY_QUEUE) { queuePool->dumpSubQueue1TopSuggestions(); } } } void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xCoordinates, const int *yCoordinates, const int *codes, const int inputLength, const int *yCoordinates, const int *codes, const int inputLength, Correction *correction) { WordsPriorityQueue *queue, Correction *correction) { if (DEBUG_DICT) { if (DEBUG_DICT) { AKLOGI("initSuggest"); AKLOGI("initSuggest"); } } proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); proximityInfo->setInputParams(codes, inputLength, xCoordinates, yCoordinates); if (queue) { queue->clear(); } const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int maxDepth = min(inputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); correction->initCorrection(proximityInfo, inputLength, maxDepth); correction->initCorrection(proximityInfo, inputLength, maxDepth); } } Loading @@ -264,15 +263,13 @@ void UnigramDictionary::getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool) { WordsPriorityQueuePool *queuePool) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); initSuggestions( getSuggestionCandidates(useFullEditDistance, inputLength, correction, queuePool, proximityInfo, xcoordinates, ycoordinates, codes, inputLength, masterQueue, correction); getSuggestionCandidates(useFullEditDistance, inputLength, correction, masterQueue, true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); true /* doAutoCompletion */, DEFAULT_MAX_ERRORS); } } void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue *queue, const int inputLength, Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, const int maxErrors) { const bool doAutoCompletion, const int maxErrors) { // TODO: Remove setCorrectionParams // TODO: Remove setCorrectionParams correction->setCorrectionParams(0, 0, 0, correction->setCorrectionParams(0, 0, 0, Loading @@ -292,7 +289,7 @@ void UnigramDictionary::getSuggestionCandidates(const bool useFullEditDistance, int firstChildPos; int firstChildPos; const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, const bool needsToTraverseChildrenNodes = processCurrentNode(siblingPos, correction, &childCount, &firstChildPos, &siblingPos, queue); correction, &childCount, &firstChildPos, &siblingPos, queuePool); // Update next sibling pos // Update next sibling pos correction->setTreeSiblingPos(outputIndex, siblingPos); correction->setTreeSiblingPos(outputIndex, siblingPos); Loading Loading @@ -327,14 +324,34 @@ void UnigramDictionary::getMistypedSpaceWords(ProximityInfo *proximityInfo, cons inline void UnigramDictionary::onTerminal(const int freq, inline void UnigramDictionary::onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueue *queue) { WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { const int inputIndex = correction->getInputIndex(); const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; if (!addToMasterQueue && !addToSubQueue) { return; } WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); int wordLength; int wordLength; unsigned short* wordPointer; unsigned short* wordPointer; const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq >= 0) { if (finalFreq >= 0) { if (!terminalAttributes.isShortcutOnly()) { if (!terminalAttributes.isShortcutOnly()) { addWord(wordPointer, wordLength, finalFreq, queue); if (addToMasterQueue) { addWord(wordPointer, wordLength, finalFreq, masterQueue); } } // TODO: Check the validity of "inputIndex == wordLength" //if (addToSubQueue && inputIndex == wordLength) { if (addToSubQueue) { addWord(wordPointer, wordLength, finalFreq, subQueue); } } // Please note that the shortcut candidates will be added to the master queue only. if (!addToMasterQueue) { return; } // From here, below is the code to add shortcut candidates. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); while (iterator.hasNextShortcutTarget()) { while (iterator.hasNextShortcutTarget()) { // TODO: addWord only supports weak ordering, meaning we have no means to control the // TODO: addWord only supports weak ordering, meaning we have no means to control the Loading @@ -345,7 +362,7 @@ inline void UnigramDictionary::onTerminal(const int freq, uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget); MAX_WORD_LENGTH_INTERNAL, shortcutTarget); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, queue); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue); } } } } } } Loading Loading @@ -411,8 +428,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo } } // TODO: Remove initSuggestions and correction->setCorrectionParams // TODO: Remove initSuggestions and correction->setCorrectionParams initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); 0 /* do not clear queue */, correction); correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, correction->setCorrectionParams(-1 /* skipPos */, -1 /* excessivePos */, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, -1 /* transposedPos */, spaceProximityPos, missingSpacePos, Loading Loading @@ -584,7 +600,7 @@ int UnigramDictionary::getBigramPosition(int pos, unsigned short *word, int offs // given level, as output into newCount when traversing this level's parent. // given level, as output into newCount when traversing this level's parent. inline bool UnigramDictionary::processCurrentNode(const int initialPos, inline bool UnigramDictionary::processCurrentNode(const int initialPos, Correction *correction, int *newCount, Correction *correction, int *newCount, int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueue *queue) { int *newChildrenPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool) { if (DEBUG_DICT) { if (DEBUG_DICT) { correction->checkState(); correction->checkState(); } } Loading Loading @@ -659,15 +675,13 @@ inline bool UnigramDictionary::processCurrentNode(const int initialPos, } while (NOT_A_CHARACTER != c); } while (NOT_A_CHARACTER != c); if (isTerminalNode) { if (isTerminalNode) { if (needsToInvokeOnTerminal) { // The frequency should be here, because we come here only if this is actually // The frequency should be here, because we come here only if this is actually // a terminal node, and we are on its last char. // a terminal node, and we are on its last char. const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int freq = BinaryFormat::readFrequencyWithoutMovingPointer(DICT_ROOT, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int childrenAddressPos = BinaryFormat::skipFrequency(flags, pos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); const int attributesPos = BinaryFormat::skipChildrenPosition(flags, childrenAddressPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); TerminalAttributes terminalAttributes(DICT_ROOT, flags, attributesPos); onTerminal(freq, terminalAttributes, correction, queue); onTerminal(freq, terminalAttributes, correction, queuePool, needsToInvokeOnTerminal); } // If there are more chars in this node, then this virtual node has children. // If there are more chars in this node, then this virtual node has children. // If we are on the last char, this virtual node has children if this node has. // If we are on the last char, this virtual node has children if this node has. Loading
native/src/unigram_dictionary.h +4 −5 Original line number Original line Diff line number Diff line Loading @@ -93,14 +93,13 @@ class UnigramDictionary { const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, const int codesRemain, const int currentDepth, int* codesDest, Correction *correction, WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool); void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int *ycoordinates, const int *codes, const int codesSize, Correction *correction); WordsPriorityQueue *queue, Correction *correction); void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool); void getSuggestionCandidates( void getSuggestionCandidates( const bool useFullEditDistance, const int inputLength, Correction *correction, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueue* queue, const bool doAutoCompletion, const int maxErrors); WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, const bool useFullEditDistance, const int inputLength, const int spaceProximityPos, Loading @@ -114,12 +113,12 @@ class UnigramDictionary { const int inputLength, const int spaceProximityPos, Correction *correction, const int inputLength, const int spaceProximityPos, Correction *correction, WordsPriorityQueuePool* queuePool); WordsPriorityQueuePool* queuePool); void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Correction *correction, WordsPriorityQueue *queue); Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue); bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c, const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth); // Process a node by considering proximity, missing and excessive character // Process a node by considering proximity, missing and excessive character bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, bool processCurrentNode(const int initialPos, Correction *correction, int *newCount, int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueue *queue); int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool); int getMostFrequentWordLike(const int startInputIndex, const int inputLength, int getMostFrequentWordLike(const int startInputIndex, const int inputLength, ProximityInfo *proximityInfo, unsigned short *word); ProximityInfo *proximityInfo, unsigned short *word); int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, int getMostFrequentWordLikeInner(const uint16_t* const inWord, const int length, Loading