Loading native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +5 −5 Original line number Diff line number Diff line Loading @@ -68,9 +68,9 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, return 0; } int pagesize = getpagesize(); adjust = dictOffset % pagesize; int adjDictOffset = dictOffset - adjust; int adjDictSize = dictSize + adjust; adjust = static_cast<int>(dictOffset) % pagesize; int adjDictOffset = static_cast<int>(dictOffset) - adjust; int adjDictSize = static_cast<int>(dictSize) + adjust; dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); if (dictBuf == MAP_FAILED) { AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); Loading Loading @@ -120,8 +120,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, releaseDictBuf(dictBuf, 0, 0); #endif // USE_MMAP_FOR_DICTIONARY } else { dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions); dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions); } PROF_END(66); PROF_CLOSE; Loading native/jni/src/basechars.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -14,6 +14,8 @@ * limitations under the License. */ #include <stdint.h> #include "char_utils.h" namespace latinime { Loading @@ -24,7 +26,7 @@ namespace latinime { * if c is not a combined character, or the base character if it * is combined. */ const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, Loading native/jni/src/bigram_dictionary.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -156,7 +156,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { BinaryFormat::getCharCodeAndForwardPointer(root, &pos); BinaryFormat::getCodePointAndForwardPointer(root, &pos); } else { pos = BinaryFormat::skipOtherCharacters(root, pos); } Loading native/jni/src/binary_format.h +21 −21 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ class BinaryFormat { static unsigned int getFlags(const uint8_t *const dict); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos); static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); Loading Loading @@ -176,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict return dict[(*pos)++]; } inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) { inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { const int origin = *pos; const int32_t character = dict[origin]; if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (character == CHARACTER_ARRAY_TERMINATOR) { const int32_t codePoint = dict[origin]; if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (codePoint == CHARACTER_ARRAY_TERMINATOR) { *pos = origin + 1; return NOT_A_CHARACTER; return NOT_A_CODE_POINT; } else { *pos = origin + 3; const int32_t char_1 = character << 16; const int32_t char_1 = codePoint << 16; const int32_t char_2 = char_1 + (dict[origin + 1] << 8); return char_2 + dict[origin + 2]; } } else { *pos = origin + 1; return character; return codePoint; } } Loading Loading @@ -369,15 +369,15 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, if (0 >= charGroupCount) return NOT_VALID_WORD; const int charGroupPos = pos; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is // no match and we can return NOT_VALID_WORD. So we will check all the characters // in this character group indeed does match. if (FLAG_HAS_MULTIPLE_CHARS & flags) { character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); while (NOT_A_CHARACTER != character) { character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); while (NOT_A_CODE_POINT != character) { ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is Loading @@ -385,7 +385,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, // match the word on the first character, but not matching the whole word). if (wordPos > length) return NOT_VALID_WORD; if (inWord[wordPos] != character) return NOT_VALID_WORD; character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); } } // If we come here we know that so far, we do match. Either we are on a terminal Loading Loading @@ -457,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a --charGroupCount) { const int startPos = pos; const uint8_t flags = getFlagsAndForwardPointer(root, &pos); const int32_t character = getCharCodeAndForwardPointer(root, &pos); const int32_t character = getCodePointAndForwardPointer(root, &pos); if (address == startPos) { // We found the address. Copy the rest of the word in the buffer and return // the length. outWord[wordPos] = character; if (FLAG_HAS_MULTIPLE_CHARS & flags) { int32_t nextChar = getCharCodeAndForwardPointer(root, &pos); int32_t nextChar = getCodePointAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug int charCount = maxDepth; while (NOT_A_CHARACTER != nextChar && --charCount > 0) { while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; nextChar = getCharCodeAndForwardPointer(root, &pos); nextChar = getCodePointAndForwardPointer(root, &pos); } } *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos); Loading Loading @@ -523,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a const uint8_t lastFlags = getFlagsAndForwardPointer(root, &lastCandidateGroupPos); const int32_t lastChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer outWord[wordPos] = lastChar; if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { int32_t nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos); int charCount = maxDepth; while (-1 != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); } } ++wordPos; Loading Loading @@ -582,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const // 0 for the bigram frequency represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. // See makedict.BinaryDictInputOutput for details. const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize); const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize); } // This returns a probability in log space. Loading native/jni/src/bloom_filter.h +8 −6 Original line number Diff line number Diff line Loading @@ -23,14 +23,16 @@ namespace latinime { static inline void setInFilter(uint8_t *filter, const int position) { const unsigned int bucket = position % BIGRAM_FILTER_MODULO; filter[bucket >> 3] |= (1 << (bucket & 0x7)); // TODO: uint32_t position static inline void setInFilter(uint8_t *filter, const int32_t position) { const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7)); } static inline bool isInFilter(const uint8_t *filter, const int position) { const unsigned int bucket = position % BIGRAM_FILTER_MODULO; return filter[bucket >> 3] & (1 << (bucket & 0x7)); // TODO: uint32_t position static inline bool isInFilter(const uint8_t *filter, const int32_t position) { const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7)); } } // namespace latinime #endif // LATINIME_BLOOM_FILTER_H Loading
native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +5 −5 Original line number Diff line number Diff line Loading @@ -68,9 +68,9 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, return 0; } int pagesize = getpagesize(); adjust = dictOffset % pagesize; int adjDictOffset = dictOffset - adjust; int adjDictSize = dictSize + adjust; adjust = static_cast<int>(dictOffset) % pagesize; int adjDictOffset = static_cast<int>(dictOffset) - adjust; int adjDictSize = static_cast<int>(dictSize) + adjust; dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset); if (dictBuf == MAP_FAILED) { AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno); Loading Loading @@ -120,8 +120,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object, releaseDictBuf(dictBuf, 0, 0); #endif // USE_MMAP_FOR_DICTIONARY } else { dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions); dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust, typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions); } PROF_END(66); PROF_CLOSE; Loading
native/jni/src/basechars.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -14,6 +14,8 @@ * limitations under the License. */ #include <stdint.h> #include "char_utils.h" namespace latinime { Loading @@ -24,7 +26,7 @@ namespace latinime { * if c is not a combined character, or the base character if it * is combined. */ const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, Loading
native/jni/src/bigram_dictionary.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -156,7 +156,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0; if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) { BinaryFormat::getCharCodeAndForwardPointer(root, &pos); BinaryFormat::getCodePointAndForwardPointer(root, &pos); } else { pos = BinaryFormat::skipOtherCharacters(root, pos); } Loading
native/jni/src/binary_format.h +21 −21 Original line number Diff line number Diff line Loading @@ -84,7 +84,7 @@ class BinaryFormat { static unsigned int getFlags(const uint8_t *const dict); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static int32_t getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos); static int32_t getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos); static int skipOtherCharacters(const uint8_t *const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); Loading Loading @@ -176,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict return dict[(*pos)++]; } inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t *const dict, int *pos) { inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t *const dict, int *pos) { const int origin = *pos; const int32_t character = dict[origin]; if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (character == CHARACTER_ARRAY_TERMINATOR) { const int32_t codePoint = dict[origin]; if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) { if (codePoint == CHARACTER_ARRAY_TERMINATOR) { *pos = origin + 1; return NOT_A_CHARACTER; return NOT_A_CODE_POINT; } else { *pos = origin + 3; const int32_t char_1 = character << 16; const int32_t char_1 = codePoint << 16; const int32_t char_2 = char_1 + (dict[origin + 1] << 8); return char_2 + dict[origin + 2]; } } else { *pos = origin + 1; return character; return codePoint; } } Loading Loading @@ -369,15 +369,15 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, if (0 >= charGroupCount) return NOT_VALID_WORD; const int charGroupPos = pos; const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); if (character == wChar) { // This is the correct node. Only one character group may start with the same // char within a node, so either we found our match in this node, or there is // no match and we can return NOT_VALID_WORD. So we will check all the characters // in this character group indeed does match. if (FLAG_HAS_MULTIPLE_CHARS & flags) { character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); while (NOT_A_CHARACTER != character) { character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); while (NOT_A_CODE_POINT != character) { ++wordPos; // If we shoot the length of the word we search for, or if we find a single // character that does not match, as explained above, it means the word is Loading @@ -385,7 +385,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root, // match the word on the first character, but not matching the whole word). if (wordPos > length) return NOT_VALID_WORD; if (inWord[wordPos] != character) return NOT_VALID_WORD; character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos); character = BinaryFormat::getCodePointAndForwardPointer(root, &pos); } } // If we come here we know that so far, we do match. Either we are on a terminal Loading Loading @@ -457,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a --charGroupCount) { const int startPos = pos; const uint8_t flags = getFlagsAndForwardPointer(root, &pos); const int32_t character = getCharCodeAndForwardPointer(root, &pos); const int32_t character = getCodePointAndForwardPointer(root, &pos); if (address == startPos) { // We found the address. Copy the rest of the word in the buffer and return // the length. outWord[wordPos] = character; if (FLAG_HAS_MULTIPLE_CHARS & flags) { int32_t nextChar = getCharCodeAndForwardPointer(root, &pos); int32_t nextChar = getCodePointAndForwardPointer(root, &pos); // We count chars in order to avoid infinite loops if the file is broken or // if there is some other bug int charCount = maxDepth; while (NOT_A_CHARACTER != nextChar && --charCount > 0) { while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; nextChar = getCharCodeAndForwardPointer(root, &pos); nextChar = getCodePointAndForwardPointer(root, &pos); } } *outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos); Loading Loading @@ -523,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a const uint8_t lastFlags = getFlagsAndForwardPointer(root, &lastCandidateGroupPos); const int32_t lastChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos); // We copy all the characters in this group to the buffer outWord[wordPos] = lastChar; if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) { int32_t nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); getCodePointAndForwardPointer(root, &lastCandidateGroupPos); int charCount = maxDepth; while (-1 != nextChar && --charCount > 0) { outWord[++wordPos] = nextChar; nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos); nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos); } } ++wordPos; Loading Loading @@ -582,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const // 0 for the bigram frequency represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. // See makedict.BinaryDictInputOutput for details. const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize); const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize); } // This returns a probability in log space. Loading
native/jni/src/bloom_filter.h +8 −6 Original line number Diff line number Diff line Loading @@ -23,14 +23,16 @@ namespace latinime { static inline void setInFilter(uint8_t *filter, const int position) { const unsigned int bucket = position % BIGRAM_FILTER_MODULO; filter[bucket >> 3] |= (1 << (bucket & 0x7)); // TODO: uint32_t position static inline void setInFilter(uint8_t *filter, const int32_t position) { const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); filter[bucket >> 3] |= static_cast<uint8_t>(1 << (bucket & 0x7)); } static inline bool isInFilter(const uint8_t *filter, const int position) { const unsigned int bucket = position % BIGRAM_FILTER_MODULO; return filter[bucket >> 3] & (1 << (bucket & 0x7)); // TODO: uint32_t position static inline bool isInFilter(const uint8_t *filter, const int32_t position) { const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO); return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7)); } } // namespace latinime #endif // LATINIME_BLOOM_FILTER_H