Loading native/src/correction.cpp +72 −32 Original line number Diff line number Diff line Loading @@ -159,10 +159,10 @@ void Correction::checkState() { } } int Correction::getFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(freqArray, wordLengthArray, this, isSpaceProximity, word); int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, wordCount, this, isSpaceProximity, word); } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { Loading Loading @@ -911,45 +911,85 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } /* static */ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const int *freqArray, const int *wordLengthArray, const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { const int firstFreq = freqArray[0]; const int secondFreq = freqArray[1]; const int firstWordLength = wordLengthArray[0]; const int secondWordLength = wordLengthArray[1]; int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; bool secondCapitalizedWordDemotion = false; { // TODO: Handle multiple capitalized word demotion properly const int firstWordLength = wordLengthArray[0]; const int secondWordLength = wordLengthArray[1]; if (firstWordLength >= 2) { firstCapitalizedWordDemotion = isUpperCase(word[0]); } bool secondCapitalizedWordDemotion = false; if (secondWordLength >= 2) { // FIXME: word[firstWordLength + 1] is incorrect. secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); } } const bool capitalizedWordDemotion = firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; if (firstWordLength == 0 || secondWordLength == 0) { int totalLength = 0; int totalFreq = 0; for (int i = 0; i < wordCount; ++i){ const int wordLength = wordLengthArray[i]; if (wordLength <= 0) { return 0; } const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1); int tempFirstFreq = firstFreq; multiplyRate(firstDemotionRate, &tempFirstFreq); const int secondDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1); int tempSecondFreq = secondFreq; multiplyRate(secondDemotionRate, &tempSecondFreq); totalLength += wordLength; const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1); int tempFirstFreq = freqArray[i]; multiplyRate(demotionRate, &tempFirstFreq); totalFreq += tempFirstFreq; } const int totalLength = firstWordLength + secondWordLength; if (totalLength <= 0 || totalFreq <= 0) { return 0; } // TODO: Currently totalFreq is adjusted to two word metrix. // Promote pairFreq with multiplying by 2, because the word length is the same as the typed // length. int totalFreq = tempFirstFreq + tempSecondFreq; totalFreq = totalFreq * 2 / wordCount; if (wordCount > 2) { // Safety net for 3+ words -- Caveats: many heuristics and workarounds here. int oneLengthCounter = 0; int twoLengthCounter = 0; for (int i = 0; i < wordCount; ++i) { const int wordLength = wordLengthArray[i]; // TODO: Use bigram instead of this safety net if (i < wordCount - 1) { const int nextWordLength = wordLengthArray[i + 1]; if (wordLength == 1 && nextWordLength == 2) { // Safety net to filter 1 length and 2 length sequential words return 0; } } const int freq = freqArray[i]; // Demote too short weak words if (wordLength <= 4 && freq <= MAX_FREQ * 2 / 3 /* heuristic... */) { multiplyRate(100 * freq / MAX_FREQ, &totalFreq); } if (wordLength == 1) { ++oneLengthCounter; } else if (wordLength == 2) { ++twoLengthCounter; } if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) { // Safety net to filter too many short words return 0; } } multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq); } // This is a workaround to try offsetting the not-enough-demotion which will be done in // calcNormalizedScore in Utils.java. Loading Loading @@ -993,9 +1033,9 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( } if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength, secondWordLength, capitalizedWordDemotion, totalFreq); DUMP_WORD(word, firstWordLength); AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1], wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq); DUMP_WORD(word, wordLengthArray[0]); } return totalFreq; Loading native/src/correction.h +5 −5 Original line number Diff line number Diff line Loading @@ -121,9 +121,9 @@ class Correction { bool needsToPrune() const; int getFreqForSplitTwoWords( const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity, const unsigned short *word); int getFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength, const int inputLength); Loading Loading @@ -151,8 +151,8 @@ class Correction { static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction, const int inputLength); static int calcFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray, const Correction* correction, const bool isSpaceProximity, static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction* correction, const bool isSpaceProximity, const unsigned short *word); static double calcNormalizedScore(const unsigned short* before, const int beforeLength, const unsigned short* after, const int afterLength, const int score); Loading native/src/defines.h +5 −3 Original line number Diff line number Diff line Loading @@ -208,6 +208,7 @@ static void prof_out(void) { #define ZERO_DISTANCE_PROMOTION_RATE 110 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_FREQ 255 // This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. Loading @@ -222,7 +223,9 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10 #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 Loading @@ -230,7 +233,6 @@ static void prof_out(void) { #define MAX_DEPTH_MULTIPLIER 3 #define FIRST_WORD_INDEX 0 #define SECOND_WORD_INDEX 1 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German // word in the dictionary Loading @@ -248,7 +250,7 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; } #define NEUTRAL_AREA_RADIUS_RATIO 1.3f // DEBUG #define INPUTLENGTH_FOR_DEBUG 10 #define INPUTLENGTH_FOR_DEBUG -1 #define MIN_OUTPUT_INDEX_FOR_DEBUG -1 #endif // LATINIME_DEFINES_H native/src/unigram_dictionary.cpp +42 −23 Original line number Diff line number Diff line Loading @@ -224,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate); } Loading Loading @@ -445,17 +445,18 @@ bool UnigramDictionary::getSubStringSuggestion( if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { return false; } outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; outputWord[tempOutputWordLength] = SPACE; if (outputWordLength) { ++*outputWordLength; } } else if (currentWordIndex >= 1) { // TODO: Handle 3 or more words const int pairFreq = correction->getFreqForSplitTwoWords( freqArray, wordLengthArray, isSpaceProximity, outputWord); const int pairFreq = correction->getFreqForSplitMultipleWords( freqArray, wordLengthArray, currentWordIndex + 1, isSpaceProximity, outputWord); if (DEBUG_DICT) { AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq, inputLength, wordLengthArray[0]); DUMP_WORD(outputWord, tempOutputWordLength); AKLOGI("Split two words: %d, %d, %d, %d, (%d) %d", freqArray[0], freqArray[1], pairFreq, inputLength, wordLengthArray[0], tempOutputWordLength); } addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue()); } Loading @@ -473,30 +474,46 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, // Return if the last word index return; } for (int i = 1; i < inputLength; ++i) { int tempOutputWordLength = 0; // First word int inputWordStartPos = 0; int inputWordLength = i; if (startWordIndex >= 1 && (hasAutoCorrectionCandidate || inputLength < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) { // Do not suggest 3+ words if already has auto correction candidate return; } for (int i = startInputPos + 1; i < inputLength; ++i) { if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words, %d", inputWordLength); AKLOGI("Multi words(%d), start in %d sep %d start out %d", startWordIndex, startInputPos, i, outputWordLength); DUMP_WORD(outputWord, outputWordLength); } int tempOutputWordLength = 0; // Current word int inputWordStartPos = startInputPos; int inputWordLength = i - startInputPos; if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */, freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) { startWordIndex, inputWordStartPos, inputWordLength, outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) { continue; } // Second word if (DEBUG_CORRECTION_FREQ) { AKLOGI("Do missing space correction"); } // Next word // Missing space inputWordStartPos = i; inputWordLength = inputLength - i; getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, if(!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, 0); startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, 0)) { getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1, tempOutputWordLength, freqArray, wordLengthArray, outputWord); } // Mistyped space ++inputWordStartPos; Loading @@ -512,15 +529,17 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, continue; } if (DEBUG_CORRECTION_FREQ) { AKLOGI("Do mistyped space correction"); } getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); } } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, Loading native/src/unigram_dictionary.h +1 −1 Original line number Diff line number Diff line Loading @@ -101,7 +101,7 @@ class UnigramDictionary { const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, Loading Loading
native/src/correction.cpp +72 −32 Original line number Diff line number Diff line Loading @@ -159,10 +159,10 @@ void Correction::checkState() { } } int Correction::getFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(freqArray, wordLengthArray, this, isSpaceProximity, word); int Correction::getFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitMultipleWords(freqArray, wordLengthArray, wordCount, this, isSpaceProximity, word); } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { Loading Loading @@ -911,45 +911,85 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const } /* static */ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const int *freqArray, const int *wordLengthArray, const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { const int firstFreq = freqArray[0]; const int secondFreq = freqArray[1]; const int firstWordLength = wordLengthArray[0]; const int secondWordLength = wordLengthArray[1]; int Correction::RankingAlgorithm::calcFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction* correction, const bool isSpaceProximity, const unsigned short *word) { const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; bool secondCapitalizedWordDemotion = false; { // TODO: Handle multiple capitalized word demotion properly const int firstWordLength = wordLengthArray[0]; const int secondWordLength = wordLengthArray[1]; if (firstWordLength >= 2) { firstCapitalizedWordDemotion = isUpperCase(word[0]); } bool secondCapitalizedWordDemotion = false; if (secondWordLength >= 2) { // FIXME: word[firstWordLength + 1] is incorrect. secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); } } const bool capitalizedWordDemotion = firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; if (firstWordLength == 0 || secondWordLength == 0) { int totalLength = 0; int totalFreq = 0; for (int i = 0; i < wordCount; ++i){ const int wordLength = wordLengthArray[i]; if (wordLength <= 0) { return 0; } const int firstDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (firstWordLength + 1); int tempFirstFreq = firstFreq; multiplyRate(firstDemotionRate, &tempFirstFreq); const int secondDemotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (secondWordLength + 1); int tempSecondFreq = secondFreq; multiplyRate(secondDemotionRate, &tempSecondFreq); totalLength += wordLength; const int demotionRate = 100 - TWO_WORDS_CORRECTION_DEMOTION_BASE / (wordLength + 1); int tempFirstFreq = freqArray[i]; multiplyRate(demotionRate, &tempFirstFreq); totalFreq += tempFirstFreq; } const int totalLength = firstWordLength + secondWordLength; if (totalLength <= 0 || totalFreq <= 0) { return 0; } // TODO: Currently totalFreq is adjusted to two word metrix. // Promote pairFreq with multiplying by 2, because the word length is the same as the typed // length. int totalFreq = tempFirstFreq + tempSecondFreq; totalFreq = totalFreq * 2 / wordCount; if (wordCount > 2) { // Safety net for 3+ words -- Caveats: many heuristics and workarounds here. int oneLengthCounter = 0; int twoLengthCounter = 0; for (int i = 0; i < wordCount; ++i) { const int wordLength = wordLengthArray[i]; // TODO: Use bigram instead of this safety net if (i < wordCount - 1) { const int nextWordLength = wordLengthArray[i + 1]; if (wordLength == 1 && nextWordLength == 2) { // Safety net to filter 1 length and 2 length sequential words return 0; } } const int freq = freqArray[i]; // Demote too short weak words if (wordLength <= 4 && freq <= MAX_FREQ * 2 / 3 /* heuristic... */) { multiplyRate(100 * freq / MAX_FREQ, &totalFreq); } if (wordLength == 1) { ++oneLengthCounter; } else if (wordLength == 2) { ++twoLengthCounter; } if (oneLengthCounter >= 2 || (oneLengthCounter + twoLengthCounter) >= 4) { // Safety net to filter too many short words return 0; } } multiplyRate(MULTIPLE_WORDS_DEMOTION_RATE, &totalFreq); } // This is a workaround to try offsetting the not-enough-demotion which will be done in // calcNormalizedScore in Utils.java. Loading Loading @@ -993,9 +1033,9 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( } if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words (%d, %d) (%d, %d) %d, %d", firstFreq, secondFreq, firstWordLength, secondWordLength, capitalizedWordDemotion, totalFreq); DUMP_WORD(word, firstWordLength); AKLOGI("Multiple words (%d, %d) (%d, %d) %d, %d", freqArray[0], freqArray[1], wordLengthArray[0], wordLengthArray[1], capitalizedWordDemotion, totalFreq); DUMP_WORD(word, wordLengthArray[0]); } return totalFreq; Loading
native/src/correction.h +5 −5 Original line number Diff line number Diff line Loading @@ -121,9 +121,9 @@ class Correction { bool needsToPrune() const; int getFreqForSplitTwoWords( const int *freqArray, const int *wordLengthArray, const bool isSpaceProximity, const unsigned short *word); int getFreqForSplitMultipleWords( const int *freqArray, const int *wordLengthArray, const int wordCount, const bool isSpaceProximity, const unsigned short *word); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); int getFinalFreqForSubQueue(const int freq, unsigned short **word, int* wordLength, const int inputLength); Loading Loading @@ -151,8 +151,8 @@ class Correction { static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction, const int inputLength); static int calcFreqForSplitTwoWords(const int *freqArray, const int *wordLengthArray, const Correction* correction, const bool isSpaceProximity, static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray, const int wordCount, const Correction* correction, const bool isSpaceProximity, const unsigned short *word); static double calcNormalizedScore(const unsigned short* before, const int beforeLength, const unsigned short* after, const int afterLength, const int score); Loading
native/src/defines.h +5 −3 Original line number Diff line number Diff line Loading @@ -208,6 +208,7 @@ static void prof_out(void) { #define ZERO_DISTANCE_PROMOTION_RATE 110 #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_FREQ 255 // This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. Loading @@ -222,7 +223,9 @@ static void prof_out(void) { #define SUB_QUEUE_MAX_WORDS 1 #define SUB_QUEUE_MAX_COUNT 10 #define SUB_QUEUE_MIN_WORD_LENGTH 4 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 2 #define MULTIPLE_WORDS_SUGGESTION_MAX_WORDS 10 #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 Loading @@ -230,7 +233,6 @@ static void prof_out(void) { #define MAX_DEPTH_MULTIPLIER 3 #define FIRST_WORD_INDEX 0 #define SECOND_WORD_INDEX 1 // TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German // word in the dictionary Loading @@ -248,7 +250,7 @@ template<typename T> inline T max(T a, T b) { return a > b ? a : b; } #define NEUTRAL_AREA_RADIUS_RATIO 1.3f // DEBUG #define INPUTLENGTH_FOR_DEBUG 10 #define INPUTLENGTH_FOR_DEBUG -1 #define MIN_OUTPUT_INDEX_FOR_DEBUG -1 #endif // LATINIME_DEFINES_H
native/src/unigram_dictionary.cpp +42 −23 Original line number Diff line number Diff line Loading @@ -224,7 +224,7 @@ void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, // Multiple word suggestions if (SUGGEST_MULTIPLE_WORDS && inputLength >= MIN_USER_TYPED_LENGTH_FOR_MULTIPLE_WORD_SUGGESTION) { getSplitTwoWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, getSplitMultipleWordsSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate); } Loading Loading @@ -445,17 +445,18 @@ bool UnigramDictionary::getSubStringSuggestion( if (outputWordStartPos + nextWordLength >= MAX_WORD_LENGTH) { return false; } outputWord[outputWordStartPos + tempOutputWordLength] = SPACE; outputWord[tempOutputWordLength] = SPACE; if (outputWordLength) { ++*outputWordLength; } } else if (currentWordIndex >= 1) { // TODO: Handle 3 or more words const int pairFreq = correction->getFreqForSplitTwoWords( freqArray, wordLengthArray, isSpaceProximity, outputWord); const int pairFreq = correction->getFreqForSplitMultipleWords( freqArray, wordLengthArray, currentWordIndex + 1, isSpaceProximity, outputWord); if (DEBUG_DICT) { AKLOGI("Split two words: %d, %d, %d, %d, (%d)", freqArray[0], freqArray[1], pairFreq, inputLength, wordLengthArray[0]); DUMP_WORD(outputWord, tempOutputWordLength); AKLOGI("Split two words: %d, %d, %d, %d, (%d) %d", freqArray[0], freqArray[1], pairFreq, inputLength, wordLengthArray[0], tempOutputWordLength); } addWord(outputWord, tempOutputWordLength, pairFreq, queuePool->getMasterQueue()); } Loading @@ -473,30 +474,46 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, // Return if the last word index return; } for (int i = 1; i < inputLength; ++i) { int tempOutputWordLength = 0; // First word int inputWordStartPos = 0; int inputWordLength = i; if (startWordIndex >= 1 && (hasAutoCorrectionCandidate || inputLength < MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION)) { // Do not suggest 3+ words if already has auto correction candidate return; } for (int i = startInputPos + 1; i < inputLength; ++i) { if (DEBUG_CORRECTION_FREQ) { AKLOGI("Two words, %d", inputWordLength); AKLOGI("Multi words(%d), start in %d sep %d start out %d", startWordIndex, startInputPos, i, outputWordLength); DUMP_WORD(outputWord, outputWordLength); } int tempOutputWordLength = 0; // Current word int inputWordStartPos = startInputPos; int inputWordLength = i - startInputPos; if (!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, FIRST_WORD_INDEX, inputWordStartPos, inputWordLength, 0, true /* not used */, freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) { startWordIndex, inputWordStartPos, inputWordLength, outputWordLength, true /* not used */, freqArray, wordLengthArray, outputWord, &tempOutputWordLength)) { continue; } // Second word if (DEBUG_CORRECTION_FREQ) { AKLOGI("Do missing space correction"); } // Next word // Missing space inputWordStartPos = i; inputWordLength = inputLength - i; getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, if(!getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, 0); startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, false /* missing space */, freqArray, wordLengthArray, outputWord, 0)) { getMultiWordsSuggestionRec(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, inputLength, correction, queuePool, hasAutoCorrectionCandidate, inputWordStartPos, startWordIndex + 1, tempOutputWordLength, freqArray, wordLengthArray, outputWord); } // Mistyped space ++inputWordStartPos; Loading @@ -512,15 +529,17 @@ void UnigramDictionary::getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, continue; } if (DEBUG_CORRECTION_FREQ) { AKLOGI("Do mistyped space correction"); } getSubStringSuggestion(proximityInfo, xcoordinates, ycoordinates, codes, useFullEditDistance, correction, queuePool, inputLength, hasAutoCorrectionCandidate, SECOND_WORD_INDEX, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); startWordIndex + 1, inputWordStartPos, inputWordLength, tempOutputWordLength, true /* mistyped space */, freqArray, wordLengthArray, outputWord, 0); } } void UnigramDictionary::getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void UnigramDictionary::getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, Loading
native/src/unigram_dictionary.h +1 −1 Original line number Diff line number Diff line Loading @@ -101,7 +101,7 @@ class UnigramDictionary { const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, const bool doAutoCompletion, const int maxErrors, const int currentWordIndex); void getSplitTwoWordsSuggestions(ProximityInfo *proximityInfo, void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const bool useFullEditDistance, const int inputLength, Correction *correction, WordsPriorityQueuePool* queuePool, Loading