Loading native/src/correction.cpp +40 −5 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ */ #include <assert.h> #include <ctype.h> #include <stdio.h> #include <string.h> Loading Loading @@ -89,8 +90,10 @@ void Correction::checkState() { } } int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) { return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitTwoWords( firstFreq, secondFreq, this, word); } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { Loading Loading @@ -498,6 +501,16 @@ inline static int getQuoteCount(const unsigned short* word, const int length) { return quoteCount; } inline static bool isUpperCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } if (isupper(c)) { return true; } return false; } /* static */ inline static int editDistance( int* editDistanceTable, const unsigned short* input, Loading Loading @@ -749,7 +762,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const /* static */ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const Correction* correction) { const int firstFreq, const int secondFreq, const Correction* correction, const unsigned short *word) { const int spaceProximityPos = correction->mSpaceProximityPos; const int missingSpacePos = correction->mMissingSpacePos; if (DEBUG_DICT) { Loading @@ -761,11 +775,27 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const bool isSpaceProximity = spaceProximityPos >= 0; const int inputLength = correction->mInputLength; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; if (firstWordLength >= 2) { firstCapitalizedWordDemotion = isUpperCase(word[0]); } bool secondCapitalizedWordDemotion = false; if (secondWordLength >= 2) { secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); } const bool capitalizedWordDemotion = firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; if (DEBUG_DICT_FULL) { LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion); } if (firstWordLength == 0 || secondWordLength == 0) { return 0; } Loading Loading @@ -815,6 +845,11 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( } multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); if (capitalizedWordDemotion) { multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); } return totalFreq; } Loading native/src/correction.h +3 −2 Original line number Diff line number Diff line Loading @@ -73,7 +73,8 @@ public: bool needsToPrune() const; int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); int getFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const unsigned short *word); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); Loading Loading @@ -151,7 +152,7 @@ private: static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction); static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const Correction* correction); const Correction* correction, const unsigned short *word); }; }; } // namespace latinime Loading native/src/defines.h +1 −0 Original line number Diff line number Diff line Loading @@ -189,6 +189,7 @@ static void dumpWord(const unsigned short* word, const int length) { #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. Loading native/src/unigram_dictionary.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -431,7 +431,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( word[i] = mWord[i - firstWordLength - 1]; } const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq); const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq, word); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } Loading Loading
native/src/correction.cpp +40 −5 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ */ #include <assert.h> #include <ctype.h> #include <stdio.h> #include <string.h> Loading Loading @@ -89,8 +90,10 @@ void Correction::checkState() { } } int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq) { return Correction::RankingAlgorithm::calcFreqForSplitTwoWords(firstFreq, secondFreq, this); int Correction::getFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const unsigned short *word) { return Correction::RankingAlgorithm::calcFreqForSplitTwoWords( firstFreq, secondFreq, this, word); } int Correction::getFinalFreq(const int freq, unsigned short **word, int *wordLength) { Loading Loading @@ -498,6 +501,16 @@ inline static int getQuoteCount(const unsigned short* word, const int length) { return quoteCount; } inline static bool isUpperCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } if (isupper(c)) { return true; } return false; } /* static */ inline static int editDistance( int* editDistanceTable, const unsigned short* input, Loading Loading @@ -749,7 +762,8 @@ int Correction::RankingAlgorithm::calculateFinalFreq(const int inputIndex, const /* static */ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const Correction* correction) { const int firstFreq, const int secondFreq, const Correction* correction, const unsigned short *word) { const int spaceProximityPos = correction->mSpaceProximityPos; const int missingSpacePos = correction->mMissingSpacePos; if (DEBUG_DICT) { Loading @@ -761,11 +775,27 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( const bool isSpaceProximity = spaceProximityPos >= 0; const int inputLength = correction->mInputLength; const int firstWordLength = isSpaceProximity ? spaceProximityPos : missingSpacePos; const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) const int secondWordLength = isSpaceProximity ? (inputLength - spaceProximityPos - 1) : (inputLength - missingSpacePos); const int typedLetterMultiplier = correction->TYPED_LETTER_MULTIPLIER; bool firstCapitalizedWordDemotion = false; if (firstWordLength >= 2) { firstCapitalizedWordDemotion = isUpperCase(word[0]); } bool secondCapitalizedWordDemotion = false; if (secondWordLength >= 2) { secondCapitalizedWordDemotion = isUpperCase(word[firstWordLength + 1]); } const bool capitalizedWordDemotion = firstCapitalizedWordDemotion ^ secondCapitalizedWordDemotion; if (DEBUG_DICT_FULL) { LOGI("Two words: %c, %c, %d", word[0], word[firstWordLength + 1], capitalizedWordDemotion); } if (firstWordLength == 0 || secondWordLength == 0) { return 0; } Loading Loading @@ -815,6 +845,11 @@ int Correction::RankingAlgorithm::calcFreqForSplitTwoWords( } multiplyRate(WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE, &totalFreq); if (capitalizedWordDemotion) { multiplyRate(TWO_WORDS_CAPITALIZED_DEMOTION_RATE, &totalFreq); } return totalFreq; } Loading
native/src/correction.h +3 −2 Original line number Diff line number Diff line Loading @@ -73,7 +73,8 @@ public: bool needsToPrune() const; int getFreqForSplitTwoWords(const int firstFreq, const int secondFreq); int getFreqForSplitTwoWords( const int firstFreq, const int secondFreq, const unsigned short *word); int getFinalFreq(const int freq, unsigned short **word, int* wordLength); CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal); Loading Loading @@ -151,7 +152,7 @@ private: static int calculateFinalFreq(const int inputIndex, const int depth, const int freq, int *editDistanceTable, const Correction* correction); static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq, const Correction* correction); const Correction* correction, const unsigned short *word); }; }; } // namespace latinime Loading
native/src/defines.h +1 −0 Original line number Diff line number Diff line Loading @@ -189,6 +189,7 @@ static void dumpWord(const unsigned short* word, const int length) { #define CORRECTION_COUNT_RATE_DEMOTION_RATE_BASE 45 #define INPUT_EXCEEDS_OUTPUT_DEMOTION_RATE 70 #define FIRST_CHAR_DIFFERENT_DEMOTION_RATE 96 #define TWO_WORDS_CAPITALIZED_DEMOTION_RATE 50 // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. Loading
native/src/unigram_dictionary.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -431,7 +431,7 @@ void UnigramDictionary::getSplitTwoWordsSuggestion( word[i] = mWord[i - firstWordLength - 1]; } const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq); const int pairFreq = mCorrection->getFreqForSplitTwoWords(firstFreq, secondFreq, word); if (DEBUG_DICT) { LOGI("Split two words: %d, %d, %d, %d", firstFreq, secondFreq, pairFreq, inputLength); } Loading