Loading native/src/correction.cpp +40 −27 Original line number Original line Diff line number Diff line Loading @@ -39,15 +39,15 @@ inline static void initEditDistance(int *editDistanceTable) { } } } } inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength, inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int outputLength) { const int editDistanceTableWidth, const int outputLength) { if (DEBUG_DICT) { if (DEBUG_DICT) { AKLOGI("EditDistanceTable"); AKLOGI("EditDistanceTable"); for (int i = 0; i <= 10; ++i) { for (int i = 0; i <= 10; ++i) { int c[11]; int c[11]; for (int j = 0; j <= 10; ++j) { for (int j = 0; j <= 10; ++j) { if (j < inputLength + 1 && i < outputLength + 1) { if (j < editDistanceTableWidth + 1 && i < outputLength + 1) { c[j] = (editDistanceTable + i * (inputLength + 1))[j]; c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j]; } else { } else { c[j] = -1; c[j] = -1; } } Loading Loading @@ -81,12 +81,12 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne } } } } inline static int getCurrentEditDistance( inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, int *editDistanceTable, const int inputLength, const int outputLength) { const int outputLength, const int inputLength) { if (DEBUG_EDIT_DISTANCE) { if (DEBUG_EDIT_DISTANCE) { AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); } } return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength]; } } ////////////////////// ////////////////////// Loading Loading @@ -165,6 +165,16 @@ int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFre } } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { return getFinalFreqInternal(freq, word, wordLength, mInputLength); } int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength, const int inputLength) { return getFinalFreqInternal(freq, word, wordLength, inputLength); } int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength, const int inputLength) { const int outputIndex = mTerminalOutputIndex; const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; *wordLength = outputIndex + 1; Loading @@ -173,8 +183,9 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen } } *word = mWord; *word = mWord; return Correction::RankingAlgorithm::calculateFinalFreq( int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq( inputIndex, outputIndex, freq, mEditDistanceTable, this); inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength); return finalFreq; } } bool Correction::initProcessState(const int outputIndex) { bool Correction::initProcessState(const int outputIndex) { Loading Loading @@ -613,9 +624,9 @@ inline static bool isUpperCase(unsigned short c) { /* static */ /* static */ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex, int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction) { const int freq, int* editDistanceTable, const Correction* correction, const int inputLength) { const int excessivePos = correction->getExcessivePos(); const int excessivePos = correction->getExcessivePos(); const int inputLength = correction->mInputLength; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const ProximityInfo *proximityInfo = correction->mProximityInfo; const ProximityInfo *proximityInfo = correction->mProximityInfo; Loading @@ -640,13 +651,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const const unsigned short* word = correction->mWord; const unsigned short* word = correction->mWord; const bool skipped = skippedCount > 0; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1) const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); // TODO: Calculate edit distance for transposed and excessive // TODO: Calculate edit distance for transposed and excessive int ed = 0; int ed = 0; if (DEBUG_DICT_FULL) { if (DEBUG_DICT_FULL) { dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1); dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength); } } int adjustedProximityMatchedCount = proximityMatchedCount; int adjustedProximityMatchedCount = proximityMatchedCount; Loading @@ -654,22 +665,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // TODO: Optimize this. // TODO: Optimize this. if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1) ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, - transposedCount; inputLength) - transposedCount; const int matchWeight = powerIntCapped(typedLetterMultiplier, const int matchWeight = powerIntCapped(typedLetterMultiplier, max(inputLength, outputIndex + 1) - ed); max(inputLength, outputLength) - ed); multiplyIntCapped(matchWeight, &finalFreq); multiplyIntCapped(matchWeight, &finalFreq); // TODO: Demote further if there are two or more excessive chars with longer user input? // TODO: Demote further if there are two or more excessive chars with longer user input? if (inputLength > outputIndex + 1) { if (inputLength > outputLength) { multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); } } ed = max(0, ed - quoteDiffCount); ed = max(0, ed - quoteDiffCount); if (transposedCount < 1) { if (transposedCount < 1) { if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) { if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { // Promote a word with just one skipped or excessive char // Promote a word with just one skipped or excessive char if (sameLength) { if (sameLength) { multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); Loading @@ -681,7 +692,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const sameLength = true; sameLength = true; } } } } adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)), adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), proximityMatchedCount); proximityMatchedCount); } else { } else { const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); Loading Loading @@ -783,7 +794,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // Promotion for an exactly matched word // Promotion for an exactly matched word if (ed == 0) { if (ed == 0) { // Full exact match // Full exact match if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) { if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 && quoteDiffCount == 0) { finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); } } } } Loading Loading @@ -828,14 +840,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } } if (DEBUG_DICT_FULL) { if (DEBUG_DICT_FULL) { AKLOGI("calc: %d, %d", outputIndex, sameLength); AKLOGI("calc: %d, %d", outputLength, sameLength); } } if (DEBUG_CORRECTION_FREQ) { if (DEBUG_CORRECTION_FREQ) { DUMP_WORD(correction->mWord, outputIndex + 1); DUMP_WORD(correction->mWord, outputLength); AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount, AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength, skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded, quoteDiffCount, ed, finalFreq); sameLength, quoteDiffCount, ed, finalFreq); } } return finalFreq; return finalFreq; Loading Loading @@ -881,11 +893,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( if (firstWordLength == 0 || secondWordLength == 0) { if (firstWordLength == 0 || secondWordLength == 0) { return 0; return 0; } } const int firstDemotionRate = 100 - 100 / (firstWordLength + 1); const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1); int tempFirstFreq = firstFreq; int tempFirstFreq = firstFreq; multiplyRate(firstDemotionRate, &tempFirstFreq); multiplyRate(firstDemotionRate, &tempFirstFreq); const int secondDemotionRate = 100 - 100 / (secondWordLength + 1); const int secondDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1); int tempSecondFreq = secondFreq; int tempSecondFreq = secondFreq; multiplyRate(secondDemotionRate, &tempSecondFreq); multiplyRate(secondDemotionRate, &tempSecondFreq); Loading native/src/correction.h +6 −1 Original line number Original line Diff line number Diff line Loading @@ -75,6 +75,8 @@ class Correction { int getFreqForSplitTwoWords( int getFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const unsigned short *word); const int firstFreq, const int secondFreq, const unsigned short *word); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength, const int inputLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); Loading @@ -97,7 +99,8 @@ class Correction { class RankingAlgorithm { class RankingAlgorithm { public: public: static int calculateFinalFreq(const int inputIndex, const int depth, static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction); const int freq, int *editDistanceTable, const Correction* correction, const int inputLength); static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const Correction* correction, const unsigned short *word); const Correction* correction, const unsigned short *word); static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq, static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq, Loading @@ -122,6 +125,8 @@ class Correction { const int32_t c, const bool isTerminal, const bool inputIndexIncremented); const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline void addCharToCurrentWord(const int32_t c); inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength, const int inputLength); const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER; const int FULL_WORD_MULTIPLIER; const int FULL_WORD_MULTIPLIER; Loading native/src/defines.h +6 −2 Original line number Original line Diff line number Diff line Loading @@ -187,7 +187,7 @@ static void prof_out(void) { // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 Loading @@ -199,6 +199,8 @@ static void prof_out(void) { #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 #define TWO_WORDS_CORRECTION_DEMOTION_BASE 80 #define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1 #define ZERO_DISTANCE_PROMOTION_RATE 110 #define ZERO_DISTANCE_PROMOTION_RATE 110 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f Loading @@ -212,8 +214,10 @@ static void prof_out(void) { // Holds up to 1 candidate for each word // Holds up to 1 candidate for each word #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 #define TWO_WORDS_CORRECTION_THRESHOLD 0.22f #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 #define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3 Loading native/src/unigram_dictionary.cpp +73 −37 Original line number Original line Diff line number Diff line Loading @@ -254,7 +254,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); ns += 0; ns += 0; AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, (ns > TWO_WORDS_CORRECTION_THRESHOLD)); (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD)); DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); DUMP_WORD(word, wordLength); DUMP_WORD(word, wordLength); } } Loading Loading @@ -343,37 +343,27 @@ inline void UnigramDictionary::onTerminal(const int freq, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { const int inputIndex = correction->getInputIndex(); const int inputIndex = correction->getInputIndex(); const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; if (!addToMasterQueue && !addToSubQueue) { return; } WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); int wordLength; int wordLength; unsigned short* wordPointer; unsigned short* wordPointer; if (addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { if (finalFreq != NOT_A_FREQUENCY) { if (!terminalAttributes.isShortcutOnly()) { if (!terminalAttributes.isShortcutOnly()) { if (addToMasterQueue) { addWord(wordPointer, wordLength, finalFreq, masterQueue); addWord(wordPointer, wordLength, finalFreq, masterQueue); } } // TODO: Check the validity of "inputIndex == wordLength" //if (addToSubQueue && inputIndex == wordLength) { if (addToSubQueue) { addWord(wordPointer, wordLength, finalFreq, subQueue); } } // Please note that the shortcut candidates will be added to the master queue only. if (!addToMasterQueue) { return; } // From here, below is the code to add shortcut candidates. // Please note that the shortcut candidates will be added to the master queue only. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); while (iterator.hasNextShortcutTarget()) { while (iterator.hasNextShortcutTarget()) { // TODO: addWord only supports weak ordering, meaning we have no means to control the // TODO: addWord only supports weak ordering, meaning we have no means // order of the shortcuts relative to one another or to the word. We need to either // to control the order of the shortcuts relative to one another or to the word. // modulate the frequency of each shortcut according to its own shortcut frequency or // We need to either modulate the frequency of each shortcut according // to make the queue so that the insert order is protected inside the queue for words // to its own shortcut frequency or to make the queue // so that the insert order is protected inside the queue for words // with the same score. // with the same score. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget( Loading @@ -383,6 +373,18 @@ inline void UnigramDictionary::onTerminal(const int freq, } } } } // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { // TODO: Check the validity of "inputIndex == wordLength" //if (addToSubQueue && inputIndex == wordLength) { WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, inputIndex); addWord(wordPointer, wordLength, finalFreq, subQueue); } } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Loading @@ -397,20 +399,57 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo } } const bool isSpaceProximity = spaceProximityPos >= 0; const bool isSpaceProximity = spaceProximityPos >= 0; const int firstWordStartPos = 0; const int firstWordStartPos = 0; const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord); unsigned short* firstWord = 0; int firstWordLength = 0; if (firstFreq > 0) { firstWordLength = firstTypedWordLength; firstWord = mWord; } else { if (masterQueue->size() > 0) { double nsForMaster = masterQueue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) { // Do nothing if the highest suggestion exceeds the threshold. return; } } WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength); if (firstWordQueue->size() < 1) { return; } int score = 0; const double ns = firstWordQueue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score, &firstWordLength); // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) { return; } firstFreq = score >> (firstWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } if (firstFreq <= 0) { return; } const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; const int secondWordLength = isSpaceProximity const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); : (inputLength - missingSpacePos); if (inputLength >= MAX_WORD_LENGTH) return; if (inputLength >= MAX_WORD_LENGTH) return; if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) return; return; const int newWordLength = firstWordLength + secondWordLength + 1; const int newWordLength = firstWordLength + secondWordLength + 1; // Space proximity preparation // Space proximity preparation //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, Loading @@ -420,15 +459,12 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo // Allocating variable length array on stack // Allocating variable length array on stack unsigned short word[newWordLength]; unsigned short word[newWordLength]; const int firstFreq = getMostFrequentWordLike( firstWordStartPos, firstWordLength, proximityInfo, mWord); if (DEBUG_DICT) { if (DEBUG_DICT) { AKLOGI("First freq: %d", firstFreq); AKLOGI("First freq: %d", firstFreq); } } if (firstFreq <= 0) return; for (int i = 0; i < firstWordLength; ++i) { for (int i = 0; i < firstWordLength; ++i) { word[i] = mWord[i]; word[i] = firstWord[i]; } } const int secondFreq = getMostFrequentWordLike( const int secondFreq = getMostFrequentWordLike( Loading native/src/words_priority_queue.h +29 −0 Original line number Original line Diff line number Diff line Loading @@ -47,6 +47,7 @@ class WordsPriorityQueue { for (int i = 0; i < maxWordLength; ++i) { for (int i = 0; i < maxWordLength; ++i) { mSuggestedWords[i].mUsed = false; mSuggestedWords[i].mUsed = false; } } mHighestSuggestedWord = 0; } } ~WordsPriorityQueue() { ~WordsPriorityQueue() { Loading Loading @@ -79,6 +80,9 @@ class WordsPriorityQueue { DUMP_WORD(word, wordLength); DUMP_WORD(word, wordLength); } } mSuggestions.push(sw); mSuggestions.push(sw); if (!mHighestSuggestedWord || mHighestSuggestedWord->mScore < sw->mScore) { mHighestSuggestedWord = sw; } } } SuggestedWord* top() { SuggestedWord* top() { Loading @@ -88,6 +92,7 @@ class WordsPriorityQueue { } } int outputSuggestions(int *frequencies, unsigned short *outputChars) { int outputSuggestions(int *frequencies, unsigned short *outputChars) { mHighestSuggestedWord = 0; const unsigned int size = min(MAX_WORDS, mSuggestions.size()); const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; int index = size - 1; while (!mSuggestions.empty() && index >= 0) { while (!mSuggestions.empty() && index >= 0) { Loading Loading @@ -116,6 +121,7 @@ class WordsPriorityQueue { } } void clear() { void clear() { mHighestSuggestedWord = 0; while (!mSuggestions.empty()) { while (!mSuggestions.empty()) { SuggestedWord* sw = mSuggestions.top(); SuggestedWord* sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { if (DEBUG_WORDS_PRIORITY_QUEUE) { Loading @@ -134,6 +140,28 @@ class WordsPriorityQueue { DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength); DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength); } } double getHighestNormalizedScore(const unsigned short* before, const int beforeLength, unsigned short** outWord, int *outScore, int *outLength) { if (!mHighestSuggestedWord) { return 0.0; } SuggestedWord* sw = mHighestSuggestedWord; const int score = sw->mScore; unsigned short* word = sw->mWord; const int wordLength = sw->mWordLength; if (outScore) { *outScore = score; } if (outWord) { *outWord = word; } if (outLength) { *outLength = wordLength; } return Correction::RankingAlgorithm::calcNormalizedScore( before, beforeLength, word, wordLength, score); } private: private: struct wordComparator { struct wordComparator { bool operator ()(SuggestedWord * left, SuggestedWord * right) { bool operator ()(SuggestedWord * left, SuggestedWord * right) { Loading @@ -158,6 +186,7 @@ class WordsPriorityQueue { const unsigned int MAX_WORDS; const unsigned int MAX_WORDS; const unsigned int MAX_WORD_LENGTH; const unsigned int MAX_WORD_LENGTH; SuggestedWord* mSuggestedWords; SuggestedWord* mSuggestedWords; SuggestedWord* mHighestSuggestedWord; }; }; } } Loading Loading
native/src/correction.cpp +40 −27 Original line number Original line Diff line number Diff line Loading @@ -39,15 +39,15 @@ inline static void initEditDistance(int *editDistanceTable) { } } } } inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int inputLength, inline static void dumpEditDistance10ForDebug(int *editDistanceTable, const int outputLength) { const int editDistanceTableWidth, const int outputLength) { if (DEBUG_DICT) { if (DEBUG_DICT) { AKLOGI("EditDistanceTable"); AKLOGI("EditDistanceTable"); for (int i = 0; i <= 10; ++i) { for (int i = 0; i <= 10; ++i) { int c[11]; int c[11]; for (int j = 0; j <= 10; ++j) { for (int j = 0; j <= 10; ++j) { if (j < inputLength + 1 && i < outputLength + 1) { if (j < editDistanceTableWidth + 1 && i < outputLength + 1) { c[j] = (editDistanceTable + i * (inputLength + 1))[j]; c[j] = (editDistanceTable + i * (editDistanceTableWidth + 1))[j]; } else { } else { c[j] = -1; c[j] = -1; } } Loading Loading @@ -81,12 +81,12 @@ inline static void calcEditDistanceOneStep(int *editDistanceTable, const unsigne } } } } inline static int getCurrentEditDistance( inline static int getCurrentEditDistance(int *editDistanceTable, const int editDistanceTableWidth, int *editDistanceTable, const int inputLength, const int outputLength) { const int outputLength, const int inputLength) { if (DEBUG_EDIT_DISTANCE) { if (DEBUG_EDIT_DISTANCE) { AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); AKLOGI("getCurrentEditDistance %d, %d", inputLength, outputLength); } } return editDistanceTable[(inputLength + 1) * (outputLength + 1) - 1]; return editDistanceTable[(editDistanceTableWidth + 1) * (outputLength) + inputLength]; } } ////////////////////// ////////////////////// Loading Loading @@ -165,6 +165,16 @@ int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFre } } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { return getFinalFreqInternal(freq, word, wordLength, mInputLength); } int Correction::getFinalFreqForSubQueue(const int freq, unsigned short **word, int *wordLength, const int inputLength) { return getFinalFreqInternal(freq, word, wordLength, inputLength); } int Correction::getFinalFreqInternal(const int freq, unsigned short **word, int *wordLength, const int inputLength) { const int outputIndex = mTerminalOutputIndex; const int outputIndex = mTerminalOutputIndex; const int inputIndex = mTerminalInputIndex; const int inputIndex = mTerminalInputIndex; *wordLength = outputIndex + 1; *wordLength = outputIndex + 1; Loading @@ -173,8 +183,9 @@ int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLen } } *word = mWord; *word = mWord; return Correction::RankingAlgorithm::calculateFinalFreq( int finalFreq = Correction::RankingAlgorithm::calculateFinalFreq( inputIndex, outputIndex, freq, mEditDistanceTable, this); inputIndex, outputIndex, freq, mEditDistanceTable, this, inputLength); return finalFreq; } } bool Correction::initProcessState(const int outputIndex) { bool Correction::initProcessState(const int outputIndex) { Loading Loading @@ -613,9 +624,9 @@ inline static bool isUpperCase(unsigned short c) { /* static */ /* static */ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex, int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const int outputIndex, const int freq, int* editDistanceTable, const Correction* correction) { const int freq, int* editDistanceTable, const Correction* correction, const int inputLength) { const int excessivePos = correction->getExcessivePos(); const int excessivePos = correction->getExcessivePos(); const int inputLength = correction->mInputLength; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const int fullWordMultiplier = correction->FULL_WORD_MULTIPLIER; const ProximityInfo *proximityInfo = correction->mProximityInfo; const ProximityInfo *proximityInfo = correction->mProximityInfo; Loading @@ -640,13 +651,13 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const const unsigned short* word = correction->mWord; const unsigned short* word = correction->mWord; const bool skipped = skippedCount > 0; const bool skipped = skippedCount > 0; const int quoteDiffCount = max(0, getQuoteCount(word, outputIndex + 1) const int quoteDiffCount = max(0, getQuoteCount(word, outputLength) - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); - getQuoteCount(proximityInfo->getPrimaryInputWord(), inputLength)); // TODO: Calculate edit distance for transposed and excessive // TODO: Calculate edit distance for transposed and excessive int ed = 0; int ed = 0; if (DEBUG_DICT_FULL) { if (DEBUG_DICT_FULL) { dumpEditDistance10ForDebug(editDistanceTable, inputLength, outputIndex + 1); dumpEditDistance10ForDebug(editDistanceTable, correction->mInputLength, outputLength); } } int adjustedProximityMatchedCount = proximityMatchedCount; int adjustedProximityMatchedCount = proximityMatchedCount; Loading @@ -654,22 +665,22 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // TODO: Optimize this. // TODO: Optimize this. if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { if (transposedCount > 0 || proximityMatchedCount > 0 || skipped || excessiveCount > 0) { ed = getCurrentEditDistance(editDistanceTable, inputLength, outputIndex + 1) ed = getCurrentEditDistance(editDistanceTable, correction->mInputLength, outputLength, - transposedCount; inputLength) - transposedCount; const int matchWeight = powerIntCapped(typedLetterMultiplier, const int matchWeight = powerIntCapped(typedLetterMultiplier, max(inputLength, outputIndex + 1) - ed); max(inputLength, outputLength) - ed); multiplyIntCapped(matchWeight, &finalFreq); multiplyIntCapped(matchWeight, &finalFreq); // TODO: Demote further if there are two or more excessive chars with longer user input? // TODO: Demote further if there are two or more excessive chars with longer user input? if (inputLength > outputIndex + 1) { if (inputLength > outputLength) { multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); multiplyRate(INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE, &finalFreq); } } ed = max(0, ed - quoteDiffCount); ed = max(0, ed - quoteDiffCount); if (transposedCount < 1) { if (transposedCount < 1) { if (ed == 1 && (inputLength == outputIndex || inputLength == outputIndex + 2)) { if (ed == 1 && (inputLength == outputLength - 1 || inputLength == outputLength + 1)) { // Promote a word with just one skipped or excessive char // Promote a word with just one skipped or excessive char if (sameLength) { if (sameLength) { multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); multiplyRate(WORDS_WITH_JUST_ONE_CORRECTION_PROMOTION_RATE, &finalFreq); Loading @@ -681,7 +692,7 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const sameLength = true; sameLength = true; } } } } adjustedProximityMatchedCount = min(max(0, ed - (outputIndex + 1 - inputLength)), adjustedProximityMatchedCount = min(max(0, ed - (outputLength - inputLength)), proximityMatchedCount); proximityMatchedCount); } else { } else { const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); const int matchWeight = powerIntCapped(typedLetterMultiplier, matchCount); Loading Loading @@ -783,7 +794,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const // Promotion for an exactly matched word // Promotion for an exactly matched word if (ed == 0) { if (ed == 0) { // Full exact match // Full exact match if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0) { if (sameLength && transposedCount == 0 && !skipped && excessiveCount == 0 && quoteDiffCount == 0) { finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); finalFreq = capped255MultForFullMatchAccentsOrCapitalizationDifference(finalFreq); } } } } Loading Loading @@ -828,14 +840,14 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } } if (DEBUG_DICT_FULL) { if (DEBUG_DICT_FULL) { AKLOGI("calc: %d, %d", outputIndex, sameLength); AKLOGI("calc: %d, %d", outputLength, sameLength); } } if (DEBUG_CORRECTION_FREQ) { if (DEBUG_CORRECTION_FREQ) { DUMP_WORD(correction->mWord, outputIndex + 1); DUMP_WORD(correction->mWord, outputLength); AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d", proximityMatchedCount, AKLOGI("FinalFreq: [P%d, S%d, T%d, E%d] %d, %d, %d, %d, %d, %d", proximityMatchedCount, skippedCount, transposedCount, excessiveCount, lastCharExceeded, sameLength, skippedCount, transposedCount, excessiveCount, outputLength, lastCharExceeded, quoteDiffCount, ed, finalFreq); sameLength, quoteDiffCount, ed, finalFreq); } } return finalFreq; return finalFreq; Loading Loading @@ -881,11 +893,12 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( if (firstWordLength == 0 || secondWordLength == 0) { if (firstWordLength == 0 || secondWordLength == 0) { return 0; return 0; } } const int firstDemotionRate = 100 - 100 / (firstWordLength + 1); const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1); int tempFirstFreq = firstFreq; int tempFirstFreq = firstFreq; multiplyRate(firstDemotionRate, &tempFirstFreq); multiplyRate(firstDemotionRate, &tempFirstFreq); const int secondDemotionRate = 100 - 100 / (secondWordLength + 1); const int secondDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1); int tempSecondFreq = secondFreq; int tempSecondFreq = secondFreq; multiplyRate(secondDemotionRate, &tempSecondFreq); multiplyRate(secondDemotionRate, &tempSecondFreq); Loading
native/src/correction.h +6 −1 Original line number Original line Diff line number Diff line Loading @@ -75,6 +75,8 @@ class Correction { int getFreqForSplitTwoWords( int getFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const unsigned short *word); const int firstFreq, const int secondFreq, const unsigned short *word); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength, const int inputLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); Loading @@ -97,7 +99,8 @@ class Correction { class RankingAlgorithm { class RankingAlgorithm { public: public: static int calculateFinalFreq(const int inputIndex, const int depth, static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction); const int freq, int *editDistanceTable, const Correction* correction, const int inputLength); static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const Correction* correction, const unsigned short *word); const Correction* correction, const unsigned short *word); static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq, static int calcFreqForSplitTwoWordsOld(const int firstFreq, const int secondFreq, Loading @@ -122,6 +125,8 @@ class Correction { const int32_t c, const bool isTerminal, const bool inputIndexIncremented); const int32_t c, const bool isTerminal, const bool inputIndexIncremented); inline CorrectionType processUnrelatedCorrectionType(); inline CorrectionType processUnrelatedCorrectionType(); inline void addCharToCurrentWord(const int32_t c); inline void addCharToCurrentWord(const int32_t c); inline int getFinalFreqInternal(const int freq, unsigned short **word, int* wordLength, const int inputLength); const int TYPED_LETTER_MULTIPLIER; const int TYPED_LETTER_MULTIPLIER; const int FULL_WORD_MULTIPLIER; const int FULL_WORD_MULTIPLIER; Loading
native/src/defines.h +6 −2 Original line number Original line Diff line number Diff line Loading @@ -187,7 +187,7 @@ static void prof_out(void) { // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 67 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 58 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 70 Loading @@ -199,6 +199,8 @@ static void prof_out(void) { #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 #define TWO_WORDS_CORRECTION_DEMOTION_BASE 80 #define TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER 1 #define ZERO_DISTANCE_PROMOTION_RATE 110 #define ZERO_DISTANCE_PROMOTION_RATE 110 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f Loading @@ -212,8 +214,10 @@ static void prof_out(void) { // Holds up to 1 candidate for each word // Holds up to 1 candidate for each word #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 #define TWO_WORDS_CORRECTION_THRESHOLD 0.22f #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 #define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3 Loading
native/src/unigram_dictionary.cpp +73 −37 Original line number Original line Diff line number Diff line Loading @@ -254,7 +254,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); proximityInfo->getPrimaryInputWord(), i, word, wordLength, score); ns += 0; ns += 0; AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, AKLOGI("--- TOP SUB WORDS for %d --- %d %f [%d]", i, score, ns, (ns > TWO_WORDS_CORRECTION_THRESHOLD)); (ns > TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD)); DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); DUMP_WORD(proximityInfo->getPrimaryInputWord(), i); DUMP_WORD(word, wordLength); DUMP_WORD(word, wordLength); } } Loading Loading @@ -343,37 +343,27 @@ inline void UnigramDictionary::onTerminal(const int freq, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { WordsPriorityQueuePool *queuePool, const bool addToMasterQueue) { const int inputIndex = correction->getInputIndex(); const int inputIndex = correction->getInputIndex(); const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; const bool addToSubQueue = inputIndex < SUB_QUEUE_MAX_COUNT; if (!addToMasterQueue && !addToSubQueue) { return; } WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); int wordLength; int wordLength; unsigned short* wordPointer; unsigned short* wordPointer; if (addToMasterQueue) { WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { if (finalFreq != NOT_A_FREQUENCY) { if (!terminalAttributes.isShortcutOnly()) { if (!terminalAttributes.isShortcutOnly()) { if (addToMasterQueue) { addWord(wordPointer, wordLength, finalFreq, masterQueue); addWord(wordPointer, wordLength, finalFreq, masterQueue); } } // TODO: Check the validity of "inputIndex == wordLength" //if (addToSubQueue && inputIndex == wordLength) { if (addToSubQueue) { addWord(wordPointer, wordLength, finalFreq, subQueue); } } // Please note that the shortcut candidates will be added to the master queue only. if (!addToMasterQueue) { return; } // From here, below is the code to add shortcut candidates. // Please note that the shortcut candidates will be added to the master queue only. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); while (iterator.hasNextShortcutTarget()) { while (iterator.hasNextShortcutTarget()) { // TODO: addWord only supports weak ordering, meaning we have no means to control the // TODO: addWord only supports weak ordering, meaning we have no means // order of the shortcuts relative to one another or to the word. We need to either // to control the order of the shortcuts relative to one another or to the word. // modulate the frequency of each shortcut according to its own shortcut frequency or // We need to either modulate the frequency of each shortcut according // to make the queue so that the insert order is protected inside the queue for words // to its own shortcut frequency or to make the queue // so that the insert order is protected inside the queue for words // with the same score. // with the same score. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( const int shortcutTargetStringLength = iterator.getNextShortcutTarget( Loading @@ -383,6 +373,18 @@ inline void UnigramDictionary::onTerminal(const int freq, } } } } // We only allow two words + other error correction for words with SUB_QUEUE_MIN_WORD_LENGTH // or more length. if (inputIndex >= SUB_QUEUE_MIN_WORD_LENGTH && addToSubQueue) { // TODO: Check the validity of "inputIndex == wordLength" //if (addToSubQueue && inputIndex == wordLength) { WordsPriorityQueue *subQueue = queuePool->getSubQueue1(inputIndex); const int finalFreq = correction->getFinalFreqForSubQueue(freq, &wordPointer, &wordLength, inputIndex); addWord(wordPointer, wordLength, finalFreq, subQueue); } } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, const bool useFullEditDistance, const int inputLength, const int missingSpacePos, Loading @@ -397,20 +399,57 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo } } const bool isSpaceProximity = spaceProximityPos >= 0; const bool isSpaceProximity = spaceProximityPos >= 0; const int firstWordStartPos = 0; const int firstWordStartPos = 0; const int firstTypedWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; int firstFreq = getMostFrequentWordLike(0, firstTypedWordLength, proximityInfo, mWord); unsigned short* firstWord = 0; int firstWordLength = 0; if (firstFreq > 0) { firstWordLength = firstTypedWordLength; firstWord = mWord; } else { if (masterQueue->size() > 0) { double nsForMaster = masterQueue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), inputLength, 0, 0, 0); if (nsForMaster > START_TWO_WORDS_CORRECTION_THRESHOLD) { // Do nothing if the highest suggestion exceeds the threshold. return; } } WordsPriorityQueue* firstWordQueue = queuePool->getSubQueue1(firstTypedWordLength); if (firstWordQueue->size() < 1) { return; } int score = 0; const double ns = firstWordQueue->getHighestNormalizedScore( proximityInfo->getPrimaryInputWord(), firstTypedWordLength, &firstWord, &score, &firstWordLength); // Two words correction won't be done if the score of the first word doesn't exceed the // threshold. if (ns < TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD) { return; } firstFreq = score >> (firstWordLength + TWO_WORDS_PLUS_OTHER_ERROR_CORRECTION_DEMOTION_DIVIDER); } if (firstFreq <= 0) { return; } const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; const int secondWordStartPos = isSpaceProximity ? (spaceProximityPos + 1) : missingSpacePos; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; const int secondWordLength = isSpaceProximity const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); : (inputLength - missingSpacePos); if (inputLength >= MAX_WORD_LENGTH) return; if (inputLength >= MAX_WORD_LENGTH) return; if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos if (0 >= firstWordLength || 0 >= secondWordLength || firstWordStartPos >= secondWordStartPos || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) || firstWordStartPos < 0 || secondWordStartPos + secondWordLength > inputLength) return; return; const int newWordLength = firstWordLength + secondWordLength + 1; const int newWordLength = firstWordLength + secondWordLength + 1; // Space proximity preparation // Space proximity preparation //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); //WordsPriorityQueue *subQueue = queuePool->getSubQueue1(); //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, //initSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, firstWordLength, subQueue, Loading @@ -420,15 +459,12 @@ void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo // Allocating variable length array on stack // Allocating variable length array on stack unsigned short word[newWordLength]; unsigned short word[newWordLength]; const int firstFreq = getMostFrequentWordLike( firstWordStartPos, firstWordLength, proximityInfo, mWord); if (DEBUG_DICT) { if (DEBUG_DICT) { AKLOGI("First freq: %d", firstFreq); AKLOGI("First freq: %d", firstFreq); } } if (firstFreq <= 0) return; for (int i = 0; i < firstWordLength; ++i) { for (int i = 0; i < firstWordLength; ++i) { word[i] = mWord[i]; word[i] = firstWord[i]; } } const int secondFreq = getMostFrequentWordLike( const int secondFreq = getMostFrequentWordLike( Loading
native/src/words_priority_queue.h +29 −0 Original line number Original line Diff line number Diff line Loading @@ -47,6 +47,7 @@ class WordsPriorityQueue { for (int i = 0; i < maxWordLength; ++i) { for (int i = 0; i < maxWordLength; ++i) { mSuggestedWords[i].mUsed = false; mSuggestedWords[i].mUsed = false; } } mHighestSuggestedWord = 0; } } ~WordsPriorityQueue() { ~WordsPriorityQueue() { Loading Loading @@ -79,6 +80,9 @@ class WordsPriorityQueue { DUMP_WORD(word, wordLength); DUMP_WORD(word, wordLength); } } mSuggestions.push(sw); mSuggestions.push(sw); if (!mHighestSuggestedWord || mHighestSuggestedWord->mScore < sw->mScore) { mHighestSuggestedWord = sw; } } } SuggestedWord* top() { SuggestedWord* top() { Loading @@ -88,6 +92,7 @@ class WordsPriorityQueue { } } int outputSuggestions(int *frequencies, unsigned short *outputChars) { int outputSuggestions(int *frequencies, unsigned short *outputChars) { mHighestSuggestedWord = 0; const unsigned int size = min(MAX_WORDS, mSuggestions.size()); const unsigned int size = min(MAX_WORDS, mSuggestions.size()); int index = size - 1; int index = size - 1; while (!mSuggestions.empty() && index >= 0) { while (!mSuggestions.empty() && index >= 0) { Loading Loading @@ -116,6 +121,7 @@ class WordsPriorityQueue { } } void clear() { void clear() { mHighestSuggestedWord = 0; while (!mSuggestions.empty()) { while (!mSuggestions.empty()) { SuggestedWord* sw = mSuggestions.top(); SuggestedWord* sw = mSuggestions.top(); if (DEBUG_WORDS_PRIORITY_QUEUE) { if (DEBUG_WORDS_PRIORITY_QUEUE) { Loading @@ -134,6 +140,28 @@ class WordsPriorityQueue { DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength); DUMP_WORD(mSuggestions.top()->mWord, mSuggestions.top()->mWordLength); } } double getHighestNormalizedScore(const unsigned short* before, const int beforeLength, unsigned short** outWord, int *outScore, int *outLength) { if (!mHighestSuggestedWord) { return 0.0; } SuggestedWord* sw = mHighestSuggestedWord; const int score = sw->mScore; unsigned short* word = sw->mWord; const int wordLength = sw->mWordLength; if (outScore) { *outScore = score; } if (outWord) { *outWord = word; } if (outLength) { *outLength = wordLength; } return Correction::RankingAlgorithm::calcNormalizedScore( before, beforeLength, word, wordLength, score); } private: private: struct wordComparator { struct wordComparator { bool operator ()(SuggestedWord * left, SuggestedWord * right) { bool operator ()(SuggestedWord * left, SuggestedWord * right) { Loading @@ -158,6 +186,7 @@ class WordsPriorityQueue { const unsigned int MAX_WORDS; const unsigned int MAX_WORDS; const unsigned int MAX_WORD_LENGTH; const unsigned int MAX_WORD_LENGTH; SuggestedWord* mSuggestedWords; SuggestedWord* mSuggestedWords; SuggestedWord* mHighestSuggestedWord; }; }; } } Loading