Loading native/src/correction.cpp +6 −5 Original line number Diff line number Diff line Loading @@ -827,11 +827,6 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const bool capitalizedWordDemotion = firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; if (DEBUG_DICT_FULL) { AKLOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion); } if (firstWordLength == 0 || secondWordLength == 0) { return 0; } Loading Loading @@ -891,6 +886,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); } if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength, secondWordLength, capitalizedWordDemotion, totalFreq); DUMP_WORD(word, firstWordLength); } return totalFreq; } Loading native/src/defines.h +3 −3 Original line number Diff line number Diff line Loading @@ -216,15 +216,15 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 #define SUB_QUEUE_MAX_WORD_INDEX 2 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 #define MAX_DEPTH_MULTIPLIER 3 #define FIRST_WORD_INDEX 1 #define SECOND_WORD_INDEX 2 #define FIRST_WORD_INDEX 0 #define SECOND_WORD_INDEX 1 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German // word in the dictionary Loading native/src/unigram_dictionary.cpp +101 −76 Original line number Diff line number Diff line Loading @@ -224,15 +224,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { for (int i = 1; i < inputLength; ++i) { if (DEBUG_DICT) { AKLOGI("--- Suggest multiple words %d", i); } getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, i, correction, queuePool, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate); } } PROF_END(5); PROF_START(6); Loading Loading @@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq, int wordLength; unsigned short* wordPointer; if ((currentWordIndex == 1) && addToMasterQueue) { if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { Loading Loading @@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion( const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) { if (DEBUG_DICT) { assert(currentWordIndex >= 1); } unsigned short* tempOutputWord = 0; int tempOutputWordLength = 0; int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); Loading @@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion( int freq = getMostFrequentWordLike( inputWordStartPos, inputWordLength, proximityInfo, mWord); if (freq > 0) { tempOutputWordLength = inputWordLength; nextWordLength = inputWordLength; tempOutputWord = mWord; } else if (!hasAutoCorrectionCandidate) { if (inputWordStartPos > 0) { Loading @@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion( getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); if (DEBUG_DICT) { if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) { if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord(); Loading @@ -415,111 +407,144 @@ bool UnigramDictionary::getSubStringSuggestion( int score = 0; const double ns = queue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), inputWordLength, &tempOutputWord, &score, &tempOutputWordLength); &tempOutputWord, &score, &nextWordLength); if (DEBUG_DICT) { AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); } // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { return false; } freq = score >> (tempOutputWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } if (DEBUG_DICT) { AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d" , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos); AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)" , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos, wordLengthArray[0]); } if (freq <= 0 || tempOutputWordLength <= 0 || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) { if (freq <= 0 || nextWordLength <= 0 || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) { return false; } for (int i = 0; i < tempOutputWordLength; ++i) { for (int i = 0; i < nextWordLength; ++i) { outputWord[outputWordStartPos + i] = tempOutputWord[i]; } // Put output values freqArray[currentWordIndex - 1] = freq; freqArray[currentWordIndex] = freq; // TODO: put output length instead of input length wordLengthArray[currentWordIndex - 1] = inputWordLength; *outputWordLength = outputWordStartPos + tempOutputWordLength; wordLengthArray[currentWordIndex] = inputWordLength; const int tempOutputWordLength = outputWordStartPos + nextWordLength; if (outputWordLength) { *outputWordLength = tempOutputWordLength; } if ((inputWordStartPos + inputWordLength) < inputLength) { if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) { if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { return false; } outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; if (outputWordLength) { ++*outputWordLength; } else if (currentWordIndex >= 2) { } } else if (currentWordIndex >= 1) { // TODO: Handle 3 or more words const int pairFreq = correction->getFreqForSplitTwoWords( freqArray, wordLengthArray, isSpaceProximity, outputWord); if (DEBUG_DICT) { AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq, inputLength); AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq, inputLength, wordLengthArray[0]); } addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue()); addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue()); } return true; } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int wordDivideIndex, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 assert(MAX_PROXIMITY_CHARS == 16); const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const int outputWordLength, int *freqArray, int* wordLengthArray, unsigned short* outputWord) { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; } // Allocating fixed length array on stack unsigned short outputWord[MAX_WORD_LENGTH]; int freqArray[SUB_QUEUE_MAX_WORD_INDEX]; int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX]; int outputWordLength = 0; for (int i = 1; i < inputLength; ++i) { int tempOutputWordLength = 0; // First word int inputWordStartPos = 0; int inputWordLength = wordDivideIndex; int inputWordLength = i; if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words, %d", inputWordLength); } if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */, freqArray, wordLengthArray, outputWord, &outputWordLength)) { return; freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) { continue; } const int tempOutputWordLength = outputWordLength; // Second word // Missing space inputWordStartPos = wordDivideIndex; inputWordLength = inputLength - wordDivideIndex; inputWordStartPos = i; inputWordLength = inputLength - i; getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength); false /* missing space */, freqArray, wordLengthArray, outputWord, 0); // Mistyped space ++inputWordStartPos; --inputWordLength; if (inputWordLength <= 0) { return; continue; } const int x = xcoordinates[inputWordStartPos - 1]; const int y = ycoordinates[inputWordStartPos - 1]; if (!proximityInfo->hasSpaceProximity(x, y)) { return; continue; } getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength); true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); } } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 assert(MAX_PROXIMITY_CHARS == 16); } if (DEBUG_DICT) { AKLOGI("--- Suggest multiple words"); } // Allocating fixed length array on stack unsigned short outputWord[MAX_WORD_LENGTH]; int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; const int outputWordLength = 0; const int startInputPos = 0; const int startWordIndex = 0; getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate, startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray, outputWord); } // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous Loading native/src/unigram_dictionary.h +8 −1 Original line number Diff line number Diff line Loading @@ -103,7 +103,7 @@ class UnigramDictionary { const int currentWordIndex); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int wordDivideIndex, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate); void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Loading @@ -127,6 +127,13 @@ class UnigramDictionary { const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, int *wordLengthArray, unsigned short* outputWord, int *outputWordLength); void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const int outputWordLength, int *freqArray, int* wordLengthArray, unsigned short* outputWord); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; Loading native/src/words_priority_queue_pool.h +14 −25 Original line number Diff line number Diff line Loading @@ -27,11 +27,10 @@ class WordsPriorityQueuePool { public: WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength); for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT; for (int i = 0, subQueueBufOffset = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) { mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset) WordsPriorityQueue(subQueueMaxWords, maxWordLength); mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset) mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset) WordsPriorityQueue(subQueueMaxWords, maxWordLength); } } Loading @@ -44,7 +43,7 @@ class WordsPriorityQueuePool { } WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) { if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { return 0; } if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) { Loading @@ -53,30 +52,21 @@ class WordsPriorityQueuePool { } return 0; } // TODO: Come up with more generic pool if (wordIndex == 1) { return mSubQueues1[inputWordLength]; } else if (wordIndex == 2) { return mSubQueues2[inputWordLength]; } else { return 0; } return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength]; } inline void clearAll() { mMasterQueue->clear(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { mSubQueues1[i]->clear(); mSubQueues2[i]->clear(); for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) { clearSubQueue(i); } } inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { if (wordIndex == 1) { mSubQueues1[i]->clear(); } else if (wordIndex == 2) { mSubQueues2[i]->clear(); WordsPriorityQueue* queue = getSubQueue(wordIndex, i); if (queue) { queue->clear(); } } } Loading @@ -84,17 +74,16 @@ class WordsPriorityQueuePool { void dumpSubQueue1TopSuggestions() { AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS"); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { mSubQueues1[i]->dumpTopWord(); getSubQueue(0, i)->dumpTopWord(); } } private: WordsPriorityQueue* mMasterQueue; WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT]; WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT]; WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; char mMasterQueueBuf[sizeof(WordsPriorityQueue)]; char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; }; } Loading Loading
native/src/correction.cpp +6 −5 Original line number Diff line number Diff line Loading @@ -827,11 +827,6 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const bool capitalizedWordDemotion = firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; if (DEBUG_DICT_FULL) { AKLOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion); } if (firstWordLength == 0 || secondWordLength == 0) { return 0; } Loading Loading @@ -891,6 +886,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); } if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength, secondWordLength, capitalizedWordDemotion, totalFreq); DUMP_WORD(word, firstWordLength); } return totalFreq; } Loading
native/src/defines.h +3 −3 Original line number Diff line number Diff line Loading @@ -216,15 +216,15 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 #define SUB_QUEUE_MAX_WORD_INDEX 2 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 #define MAX_DEPTH_MULTIPLIER 3 #define FIRST_WORD_INDEX 1 #define SECOND_WORD_INDEX 2 #define FIRST_WORD_INDEX 0 #define SECOND_WORD_INDEX 1 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German // word in the dictionary Loading
native/src/unigram_dictionary.cpp +101 −76 Original line number Diff line number Diff line Loading @@ -224,15 +224,10 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { for (int i = 1; i < inputLength; ++i) { if (DEBUG_DICT) { AKLOGI("--- Suggest multiple words %d", i); } getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, i, correction, queuePool, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate); } } PROF_END(5); PROF_START(6); Loading Loading @@ -329,7 +324,7 @@ inline void UnigramDictionary::onTerminal(const int freq, int wordLength; unsigned short* wordPointer; if ((currentWordIndex == 1) && addToMasterQueue) { if ((currentWordIndex == FIRST_WORD_INDEX) && addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { Loading Loading @@ -377,11 +372,8 @@ bool UnigramDictionary::getSubStringSuggestion( const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, int*wordLengthArray, unsigned short* outputWord, int *outputWordLength) { if (DEBUG_DICT) { assert(currentWordIndex >= 1); } unsigned short* tempOutputWord = 0; int tempOutputWordLength = 0; int nextWordLength = 0; // TODO: Optimize init suggestion initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, inputLength, correction); Loading @@ -389,7 +381,7 @@ bool UnigramDictionary::getSubStringSuggestion( int freq = getMostFrequentWordLike( inputWordStartPos, inputWordLength, proximityInfo, mWord); if (freq > 0) { tempOutputWordLength = inputWordLength; nextWordLength = inputWordLength; tempOutputWord = mWord; } else if (!hasAutoCorrectionCandidate) { if (inputWordStartPos > 0) { Loading @@ -400,7 +392,7 @@ bool UnigramDictionary::getSubStringSuggestion( getSuggestionCandidates(useFullEditDistance, inputWordLength, correction, queuePool, false, MAX_ERRORS_FOR_TWO_WORDS, currentWordIndex); if (DEBUG_DICT) { if (currentWordIndex <= SUB_QUEUE_MAX_WORD_INDEX) { if (currentWordIndex < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { AKLOGI("Dump word candidates(%d) %d", currentWordIndex, inputWordLength); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { queuePool->getSubQueue(currentWordIndex, i)->dumpTopWord(); Loading @@ -415,111 +407,144 @@ bool UnigramDictionary::getSubStringSuggestion( int score = 0; const double ns = queue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), inputWordLength, &tempOutputWord, &score, &tempOutputWordLength); &tempOutputWord, &score, &nextWordLength); if (DEBUG_DICT) { AKLOGI("NS(%d) = %f, Score = %d", currentWordIndex, ns, score); } // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD || tempOutputWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { || nextWordLength < SUB_QUEUE_MIN_WORD_LENGTH) { return false; } freq = score >> (tempOutputWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); freq = score >> (nextWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } if (DEBUG_DICT) { AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d" , currentWordIndex, freq, tempOutputWordLength, inputWordLength, inputWordStartPos); AKLOGI("Freq(%d): %d, length: %d, input length: %d, input start: %d (%d)" , currentWordIndex, freq, nextWordLength, inputWordLength, inputWordStartPos, wordLengthArray[0]); } if (freq <= 0 || tempOutputWordLength <= 0 || MAX_WORD_LENGTH <= (outputWordStartPos + tempOutputWordLength)) { if (freq <= 0 || nextWordLength <= 0 || MAX_WORD_LENGTH <= (outputWordStartPos + nextWordLength)) { return false; } for (int i = 0; i < tempOutputWordLength; ++i) { for (int i = 0; i < nextWordLength; ++i) { outputWord[outputWordStartPos + i] = tempOutputWord[i]; } // Put output values freqArray[currentWordIndex - 1] = freq; freqArray[currentWordIndex] = freq; // TODO: put output length instead of input length wordLengthArray[currentWordIndex - 1] = inputWordLength; *outputWordLength = outputWordStartPos + tempOutputWordLength; wordLengthArray[currentWordIndex] = inputWordLength; const int tempOutputWordLength = outputWordStartPos + nextWordLength; if (outputWordLength) { *outputWordLength = tempOutputWordLength; } if ((inputWordStartPos + inputWordLength) < inputLength) { if (outputWordStartPos + tempOutputWordLength >= MAX_WORD_LENGTH) { if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { return false; } outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; if (outputWordLength) { ++*outputWordLength; } else if (currentWordIndex >= 2) { } } else if (currentWordIndex >= 1) { // TODO: Handle 3 or more words const int pairFreq = correction->getFreqForSplitTwoWords( freqArray, wordLengthArray, isSpaceProximity, outputWord); if (DEBUG_DICT) { AKLOGI("Split two words: %d, %d, %d, %d", freqArray[0], freqArray[1], pairFreq, inputLength); AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq, inputLength, wordLengthArray[0]); } addWord(outputWord, *outputWordLength, pairFreq, queuePool->getMasterQueue()); addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue()); } return true; } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int wordDivideIndex, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 assert(MAX_PROXIMITY_CHARS == 16); const bool hasAutoCorrectionCandidate, const int startInputPos, const int startWordIndex, const int outputWordLength, int *freqArray, int* wordLengthArray, unsigned short* outputWord) { if (startWordIndex >= (MULTIPLE_WORDS_SUGGESTION_MAX_WORDS - 1)) { // Return if the last word index return; } // Allocating fixed length array on stack unsigned short outputWord[MAX_WORD_LENGTH]; int freqArray[SUB_QUEUE_MAX_WORD_INDEX]; int wordLengthArray[SUB_QUEUE_MAX_WORD_INDEX]; int outputWordLength = 0; for (int i = 1; i < inputLength; ++i) { int tempOutputWordLength = 0; // First word int inputWordStartPos = 0; int inputWordLength = wordDivideIndex; int inputWordLength = i; if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words, %d", inputWordLength); } if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */, freqArray, wordLengthArray, outputWord, &outputWordLength)) { return; freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) { continue; } const int tempOutputWordLength = outputWordLength; // Second word // Missing space inputWordStartPos = wordDivideIndex; inputWordLength = inputLength - wordDivideIndex; inputWordStartPos = i; inputWordLength = inputLength - i; getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, &outputWordLength); false /* missing space */, freqArray, wordLengthArray, outputWord, 0); // Mistyped space ++inputWordStartPos; --inputWordLength; if (inputWordLength <= 0) { return; continue; } const int x = xcoordinates[inputWordStartPos - 1]; const int y = ycoordinates[inputWordStartPos - 1]; if (!proximityInfo->hasSpaceProximity(x, y)) { return; continue; } getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, &outputWordLength); true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); } } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate) { if (inputLength >= MAX_WORD_LENGTH) return; if (DEBUG_DICT) { // MAX_PROXIMITY_CHARS_SIZE in ProximityInfo.java should be 16 assert(MAX_PROXIMITY_CHARS == 16); } if (DEBUG_DICT) { AKLOGI("--- Suggest multiple words"); } // Allocating fixed length array on stack unsigned short outputWord[MAX_WORD_LENGTH]; int freqArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; int wordLengthArray[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; const int outputWordLength = 0; const int startInputPos = 0; const int startWordIndex = 0; getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate, startInputPos, startWordIndex, outputWordLength, freqArray, wordLengthArray, outputWord); } // Wrapper for getMostFrequentWordLikeInner, which matches it to the previous Loading
native/src/unigram_dictionary.h +8 −1 Original line number Diff line number Diff line Loading @@ -103,7 +103,7 @@ class UnigramDictionary { const int currentWordIndex); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int wordDivideIndex, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate); void onTerminal(const int freq, const TerminalAttributes& terminalAttributes, Loading @@ -127,6 +127,13 @@ class UnigramDictionary { const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, const bool isSpaceProximity, int *freqArray, int *wordLengthArray, unsigned short* outputWord, int *outputWordLength); void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, const int outputWordLength, int *freqArray, int* wordLengthArray, unsigned short* outputWord); const uint8_t* const DICT_ROOT; const int MAX_WORD_LENGTH; Loading
native/src/words_priority_queue_pool.h +14 −25 Original line number Diff line number Diff line Loading @@ -27,11 +27,10 @@ class WordsPriorityQueuePool { public: WordsPriorityQueuePool(int mainQueueMaxWords, int subQueueMaxWords, int maxWordLength) { mMasterQueue = new(mMasterQueueBuf) WordsPriorityQueue(mainQueueMaxWords, maxWordLength); for (int i = 0, subQueueBufOffset = 0; i < SUB_QUEUE_MAX_COUNT; for (int i = 0, subQueueBufOffset = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT; ++i, subQueueBufOffset += sizeof(WordsPriorityQueue)) { mSubQueues1[i] = new(mSubQueueBuf1 + subQueueBufOffset) WordsPriorityQueue(subQueueMaxWords, maxWordLength); mSubQueues2[i] = new(mSubQueueBuf2 + subQueueBufOffset) mSubQueues[i] = new(mSubQueueBuf + subQueueBufOffset) WordsPriorityQueue(subQueueMaxWords, maxWordLength); } } Loading @@ -44,7 +43,7 @@ class WordsPriorityQueuePool { } WordsPriorityQueue* getSubQueue(const int wordIndex, const int inputWordLength) { if (wordIndex > SUB_QUEUE_MAX_WORD_INDEX) { if (wordIndex >= MULTIPLE_WORDS_SUGGESTION_MAX_WORDS) { return 0; } if (inputWordLength < 0 || inputWordLength >= SUB_QUEUE_MAX_COUNT) { Loading @@ -53,30 +52,21 @@ class WordsPriorityQueuePool { } return 0; } // TODO: Come up with more generic pool if (wordIndex == 1) { return mSubQueues1[inputWordLength]; } else if (wordIndex == 2) { return mSubQueues2[inputWordLength]; } else { return 0; } return mSubQueues[wordIndex * SUB_QUEUE_MAX_COUNT + inputWordLength]; } inline void clearAll() { mMasterQueue->clear(); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { mSubQueues1[i]->clear(); mSubQueues2[i]->clear(); for (int i = 0; i < MULTIPLE_WORDS_SUGGESTION_MAX_WORDS; ++i) { clearSubQueue(i); } } inline void clearSubQueue(const int wordIndex) { for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { if (wordIndex == 1) { mSubQueues1[i]->clear(); } else if (wordIndex == 2) { mSubQueues2[i]->clear(); WordsPriorityQueue* queue = getSubQueue(wordIndex, i); if (queue) { queue->clear(); } } } Loading @@ -84,17 +74,16 @@ class WordsPriorityQueuePool { void dumpSubQueue1TopSuggestions() { AKLOGI("DUMP SUBQUEUE1 TOP SUGGESTIONS"); for (int i = 0; i < SUB_QUEUE_MAX_COUNT; ++i) { mSubQueues1[i]->dumpTopWord(); getSubQueue(0, i)->dumpTopWord(); } } private: WordsPriorityQueue* mMasterQueue; WordsPriorityQueue* mSubQueues1[SUB_QUEUE_MAX_COUNT]; WordsPriorityQueue* mSubQueues2[SUB_QUEUE_MAX_COUNT]; WordsPriorityQueue* mSubQueues[SUB_QUEUE_MAX_COUNT * MULTIPLE_WORDS_SUGGESTION_MAX_WORDS]; char mMasterQueueBuf[sizeof(WordsPriorityQueue)]; char mSubQueueBuf1[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; char mSubQueueBuf2[SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; char mSubQueueBuf[MULTIPLE_WORDS_SUGGESTION_MAX_WORDS * SUB_QUEUE_MAX_COUNT * sizeof(WordsPriorityQueue)]; }; } Loading