Loading native/src/bigram_dictionary.cpp +1 −2 Original line number Diff line number Diff line Loading @@ -111,8 +111,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i mMaxBigrams = maxBigrams; if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) { int pos = mParentDictionary->isValidWordRec( DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); int pos = mParentDictionary->isValidWord(prevWord, prevWordLength); if (DEBUG_DICT) { LOGI("Pos -> %d", pos); } Loading native/src/dictionary.cpp +1 −38 Original line number Diff line number Diff line Loading @@ -53,45 +53,8 @@ bool Dictionary::hasBigram() { return ((mDict[1] & 0xFF) == 1); } // TODO: use uint32_t instead of unsigned short bool Dictionary::isValidWord(unsigned short *word, int length) { if (IS_LATEST_DICT_VERSION) { return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); } else { return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); return mUnigramDictionary->isValidWord(word, length); } } int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { // returns address of bigram data of that word // return -99 if not found int count = Dictionary::getCount(mDict, &pos); unsigned short currentChar = (unsigned short) word[offset]; for (int j = 0; j < count; j++) { unsigned short c = Dictionary::getChar(mDict, &pos); int terminal = Dictionary::getTerminal(mDict, &pos); int childPos = Dictionary::getAddress(mDict, &pos); if (c == currentChar) { if (offset == length - 1) { if (terminal) { return (pos+1); } } else { if (childPos != 0) { int t = isValidWordRec(childPos, word, offset + 1, length); if (t > 0) { return t; } } } } if (terminal) { Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos); } // There could be two instances of each alphabet - upper and lower case. So continue // looking ... } return NOT_VALID_WORD; } } // namespace latinime native/src/dictionary.h +0 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,6 @@ public: } bool isValidWord(unsigned short *word, int length); int isValidWordRec(int pos, unsigned short *word, int offset, int length); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } int getMmapFd() { return mMmapFd; } Loading native/src/unigram_dictionary.cpp +45 −3 Original line number Diff line number Diff line Loading @@ -265,8 +265,7 @@ void UnigramDictionary::initSuggestions(const int *codes, const int codesSize, mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; } void UnigramDictionary::registerNextLetter( unsigned short c, int *nextLetters, int nextLettersSize) { static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { if (c < nextLettersSize) { nextLetters[c]++; } Loading Loading @@ -322,7 +321,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) return false; } unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) { static inline unsigned short toBaseLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } Loading Loading @@ -924,4 +923,47 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh return false; } } // TODO: use uint32_t instead of unsigned short bool UnigramDictionary::isValidWord(unsigned short *word, int length) { if (IS_LATEST_DICT_VERSION) { return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); } else { return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); } } int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { // returns address of bigram data of that word // return -99 if not found int count = Dictionary::getCount(DICT_ROOT, &pos); unsigned short currentChar = (unsigned short) word[offset]; for (int j = 0; j < count; j++) { unsigned short c = Dictionary::getChar(DICT_ROOT, &pos); int terminal = Dictionary::getTerminal(DICT_ROOT, &pos); int childPos = Dictionary::getAddress(DICT_ROOT, &pos); if (c == currentChar) { if (offset == length - 1) { if (terminal) { return (pos+1); } } else { if (childPos != 0) { int t = isValidWordRec(childPos, word, offset + 1, length); if (t > 0) { return t; } } } } if (terminal) { Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos); } // There could be two instances of each alphabet - upper and lower case. So continue // looking ... } return NOT_VALID_WORD; } } // namespace latinime native/src/unigram_dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ public: UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); bool isValidWord(unsigned short *word, int length); int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); Loading @@ -58,6 +59,7 @@ private: void getSuggestionCandidates(const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize, const int maxDepth); int isValidWordRec(int pos, unsigned short *word, int offset, int length); void getVersionNumber(); bool checkIfDictVersionIsLatest(); int getAddress(int *pos); Loading @@ -65,7 +67,6 @@ private: int wideStrLen(unsigned short *str); bool sameAsTyped(unsigned short *word, int length); bool addWord(unsigned short *word, int length, int frequency); unsigned short toBaseLowerCase(unsigned short c); void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, Loading @@ -79,7 +80,6 @@ private: void getWordsOld(const int initialPos, const int inputLength, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int freq, const bool sameLength) const; Loading Loading
native/src/bigram_dictionary.cpp +1 −2 Original line number Diff line number Diff line Loading @@ -111,8 +111,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i mMaxBigrams = maxBigrams; if (HAS_BIGRAM && IS_LATEST_DICT_VERSION) { int pos = mParentDictionary->isValidWordRec( DICTIONARY_HEADER_SIZE, prevWord, 0, prevWordLength); int pos = mParentDictionary->isValidWord(prevWord, prevWordLength); if (DEBUG_DICT) { LOGI("Pos -> %d", pos); } Loading
native/src/dictionary.cpp +1 −38 Original line number Diff line number Diff line Loading @@ -53,45 +53,8 @@ bool Dictionary::hasBigram() { return ((mDict[1] & 0xFF) == 1); } // TODO: use uint32_t instead of unsigned short bool Dictionary::isValidWord(unsigned short *word, int length) { if (IS_LATEST_DICT_VERSION) { return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); } else { return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); return mUnigramDictionary->isValidWord(word, length); } } int Dictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { // returns address of bigram data of that word // return -99 if not found int count = Dictionary::getCount(mDict, &pos); unsigned short currentChar = (unsigned short) word[offset]; for (int j = 0; j < count; j++) { unsigned short c = Dictionary::getChar(mDict, &pos); int terminal = Dictionary::getTerminal(mDict, &pos); int childPos = Dictionary::getAddress(mDict, &pos); if (c == currentChar) { if (offset == length - 1) { if (terminal) { return (pos+1); } } else { if (childPos != 0) { int t = isValidWordRec(childPos, word, offset + 1, length); if (t > 0) { return t; } } } } if (terminal) { Dictionary::getFreq(mDict, IS_LATEST_DICT_VERSION, &pos); } // There could be two instances of each alphabet - upper and lower case. So continue // looking ... } return NOT_VALID_WORD; } } // namespace latinime
native/src/dictionary.h +0 −1 Original line number Diff line number Diff line Loading @@ -43,7 +43,6 @@ public: } bool isValidWord(unsigned short *word, int length); int isValidWordRec(int pos, unsigned short *word, int offset, int length); void *getDict() { return (void *)mDict; } int getDictSize() { return mDictSize; } int getMmapFd() { return mMmapFd; } Loading
native/src/unigram_dictionary.cpp +45 −3 Original line number Diff line number Diff line Loading @@ -265,8 +265,7 @@ void UnigramDictionary::initSuggestions(const int *codes, const int codesSize, mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; } void UnigramDictionary::registerNextLetter( unsigned short c, int *nextLetters, int nextLettersSize) { static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { if (c < nextLettersSize) { nextLetters[c]++; } Loading Loading @@ -322,7 +321,7 @@ bool UnigramDictionary::addWord(unsigned short *word, int length, int frequency) return false; } unsigned short UnigramDictionary::toBaseLowerCase(unsigned short c) { static inline unsigned short toBaseLowerCase(unsigned short c) { if (c < sizeof(BASE_CHARS) / sizeof(BASE_CHARS[0])) { c = BASE_CHARS[c]; } Loading Loading @@ -924,4 +923,47 @@ inline bool UnigramDictionary::processCurrentNodeForExactMatch(const int firstCh return false; } } // TODO: use uint32_t instead of unsigned short bool UnigramDictionary::isValidWord(unsigned short *word, int length) { if (IS_LATEST_DICT_VERSION) { return (isValidWordRec(DICTIONARY_HEADER_SIZE, word, 0, length) != NOT_VALID_WORD); } else { return (isValidWordRec(0, word, 0, length) != NOT_VALID_WORD); } } int UnigramDictionary::isValidWordRec(int pos, unsigned short *word, int offset, int length) { // returns address of bigram data of that word // return -99 if not found int count = Dictionary::getCount(DICT_ROOT, &pos); unsigned short currentChar = (unsigned short) word[offset]; for (int j = 0; j < count; j++) { unsigned short c = Dictionary::getChar(DICT_ROOT, &pos); int terminal = Dictionary::getTerminal(DICT_ROOT, &pos); int childPos = Dictionary::getAddress(DICT_ROOT, &pos); if (c == currentChar) { if (offset == length - 1) { if (terminal) { return (pos+1); } } else { if (childPos != 0) { int t = isValidWordRec(childPos, word, offset + 1, length); if (t > 0) { return t; } } } } if (terminal) { Dictionary::getFreq(DICT_ROOT, IS_LATEST_DICT_VERSION, &pos); } // There could be two instances of each alphabet - upper and lower case. So continue // looking ... } return NOT_VALID_WORD; } } // namespace latinime
native/src/unigram_dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -39,6 +39,7 @@ public: UnigramDictionary(const uint8_t* const streamStart, int typedLetterMultipler, int fullWordMultiplier, int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion); bool isValidWord(unsigned short *word, int length); int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); Loading @@ -58,6 +59,7 @@ private: void getSuggestionCandidates(const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize, const int maxDepth); int isValidWordRec(int pos, unsigned short *word, int offset, int length); void getVersionNumber(); bool checkIfDictVersionIsLatest(); int getAddress(int *pos); Loading @@ -65,7 +67,6 @@ private: int wideStrLen(unsigned short *str); bool sameAsTyped(unsigned short *word, int length); bool addWord(unsigned short *word, int length, int frequency); unsigned short toBaseLowerCase(unsigned short c); void getWordsRec(const int childrenCount, const int pos, const int depth, const int maxDepth, const bool traverseAllNodes, const int snr, const int inputIndex, const int diffs, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, Loading @@ -79,7 +80,6 @@ private: void getWordsOld(const int initialPos, const int inputLength, const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int freq, const bool sameLength) const; Loading