Merge "Cosmetic fixes and a bug fix in... (d34dd5bb) · Commits · e / os / android_packages_inputmethods_LatinIME

native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp

+5 −5

Original line number	Diff line number	Diff line
		@@ -68,9 +68,9 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
		return 0;
		}
		int pagesize = getpagesize();
		adjust = dictOffset % pagesize;
		int adjDictOffset = dictOffset - adjust;
		int adjDictSize = dictSize + adjust;
		adjust = static_cast<int>(dictOffset) % pagesize;
		int adjDictOffset = static_cast<int>(dictOffset) - adjust;
		int adjDictSize = static_cast<int>(dictSize) + adjust;
		dictBuf = mmap(0, sizeof(char) * adjDictSize, PROT_READ, MAP_PRIVATE, fd, adjDictOffset);
		if (dictBuf == MAP_FAILED) {
		AKLOGE("DICT: Can't mmap dictionary. errno=%d", errno);
		@@ -120,8 +120,8 @@ static jlong latinime_BinaryDictionary_open(JNIEnv *env, jobject object,
		releaseDictBuf(dictBuf, 0, 0);
		#endif // USE_MMAP_FOR_DICTIONARY
		} else {
		dictionary = new Dictionary(dictBuf, dictSize, fd, adjust, typedLetterMultiplier,
		fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
		dictionary = new Dictionary(dictBuf, static_cast<int>(dictSize), fd, adjust,
		typedLetterMultiplier, fullWordMultiplier, maxWordLength, maxWords, maxPredictions);
		}
		PROF_END(66);
		PROF_CLOSE;

native/jni/src/basechars.cpp

+3 −1

Original line number	Diff line number	Diff line
		@@ -14,6 +14,8 @@
		* limitations under the License.
		*/

		#include <stdint.h>

		#include "char_utils.h"

		namespace latinime {
		@@ -24,7 +26,7 @@ namespace latinime {
		* if c is not a combined character, or the base character if it
		* is combined.
		*/
		const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = {
		const uint16_t BASE_CHARS[BASE_CHARS_SIZE] = {
		0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
		0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
		0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,

native/jni/src/bigram_dictionary.cpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -156,7 +156,7 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord,
		const int flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
		if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
		if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
		BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
		BinaryFormat::getCodePointAndForwardPointer(root, &pos);
		} else {
		pos = BinaryFormat::skipOtherCharacters(root, pos);
		}

native/jni/src/binary_format.h

+21 −21

Original line number	Diff line number	Diff line
		@@ -84,7 +84,7 @@ class BinaryFormat {
		static unsigned int getFlags(const uint8_t *const dict);
		static int getGroupCountAndForwardPointer(const uint8_t const dict, int pos);
		static uint8_t getFlagsAndForwardPointer(const uint8_t const dict, int pos);
		static int32_t getCharCodeAndForwardPointer(const uint8_t const dict, int pos);
		static int32_t getCodePointAndForwardPointer(const uint8_t const dict, int pos);
		static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
		static int skipOtherCharacters(const uint8_t *const dict, const int pos);
		static int skipChildrenPosition(const uint8_t flags, const int pos);
		@@ -176,22 +176,22 @@ inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict
		return dict[(*pos)++];
		}

		inline int32_t BinaryFormat::getCharCodeAndForwardPointer(const uint8_t const dict, int pos) {
		inline int32_t BinaryFormat::getCodePointAndForwardPointer(const uint8_t const dict, int pos) {
		const int origin = *pos;
		const int32_t character = dict[origin];
		if (character < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
		if (character == CHARACTER_ARRAY_TERMINATOR) {
		const int32_t codePoint = dict[origin];
		if (codePoint < MINIMAL_ONE_BYTE_CHARACTER_VALUE) {
		if (codePoint == CHARACTER_ARRAY_TERMINATOR) {
		*pos = origin + 1;
		return NOT_A_CHARACTER;
		return NOT_A_CODE_POINT;
		} else {
		*pos = origin + 3;
		const int32_t char_1 = character << 16;
		const int32_t char_1 = codePoint << 16;
		const int32_t char_2 = char_1 + (dict[origin + 1] << 8);
		return char_2 + dict[origin + 2];
		}
		} else {
		*pos = origin + 1;
		return character;
		return codePoint;
		}
		}

		@@ -369,15 +369,15 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
		if (0 >= charGroupCount) return NOT_VALID_WORD;
		const int charGroupPos = pos;
		const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
		int32_t character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
		int32_t character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
		if (character == wChar) {
		// This is the correct node. Only one character group may start with the same
		// char within a node, so either we found our match in this node, or there is
		// no match and we can return NOT_VALID_WORD. So we will check all the characters
		// in this character group indeed does match.
		if (FLAG_HAS_MULTIPLE_CHARS & flags) {
		character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
		while (NOT_A_CHARACTER != character) {
		character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
		while (NOT_A_CODE_POINT != character) {
		++wordPos;
		// If we shoot the length of the word we search for, or if we find a single
		// character that does not match, as explained above, it means the word is
		@@ -385,7 +385,7 @@ inline int BinaryFormat::getTerminalPosition(const uint8_t *const root,
		// match the word on the first character, but not matching the whole word).
		if (wordPos > length) return NOT_VALID_WORD;
		if (inWord[wordPos] != character) return NOT_VALID_WORD;
		character = BinaryFormat::getCharCodeAndForwardPointer(root, &pos);
		character = BinaryFormat::getCodePointAndForwardPointer(root, &pos);
		}
		}
		// If we come here we know that so far, we do match. Either we are on a terminal
		@@ -457,19 +457,19 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
		--charGroupCount) {
		const int startPos = pos;
		const uint8_t flags = getFlagsAndForwardPointer(root, &pos);
		const int32_t character = getCharCodeAndForwardPointer(root, &pos);
		const int32_t character = getCodePointAndForwardPointer(root, &pos);
		if (address == startPos) {
		// We found the address. Copy the rest of the word in the buffer and return
		// the length.
		outWord[wordPos] = character;
		if (FLAG_HAS_MULTIPLE_CHARS & flags) {
		int32_t nextChar = getCharCodeAndForwardPointer(root, &pos);
		int32_t nextChar = getCodePointAndForwardPointer(root, &pos);
		// We count chars in order to avoid infinite loops if the file is broken or
		// if there is some other bug
		int charCount = maxDepth;
		while (NOT_A_CHARACTER != nextChar && --charCount > 0) {
		while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
		outWord[++wordPos] = nextChar;
		nextChar = getCharCodeAndForwardPointer(root, &pos);
		nextChar = getCodePointAndForwardPointer(root, &pos);
		}
		}
		*outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
		@@ -523,16 +523,16 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int a
		const uint8_t lastFlags =
		getFlagsAndForwardPointer(root, &lastCandidateGroupPos);
		const int32_t lastChar =
		getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
		getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
		// We copy all the characters in this group to the buffer
		outWord[wordPos] = lastChar;
		if (FLAG_HAS_MULTIPLE_CHARS & lastFlags) {
		int32_t nextChar =
		getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
		getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
		int charCount = maxDepth;
		while (-1 != nextChar && --charCount > 0) {
		outWord[++wordPos] = nextChar;
		nextChar = getCharCodeAndForwardPointer(root, &lastCandidateGroupPos);
		nextChar = getCodePointAndForwardPointer(root, &lastCandidateGroupPos);
		}
		}
		++wordPos;
		@@ -582,8 +582,8 @@ inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const
		// 0 for the bigram frequency represents the middle of the 16th step from the top,
		// while a value of 15 represents the middle of the top step.
		// See makedict.BinaryDictInputOutput for details.
		const float stepSize = (static_cast<float>(MAX_FREQ) - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
		return static_cast<int>(unigramFreq + (bigramFreq + 1) * stepSize);
		const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
		return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
		}

		// This returns a probability in log space.

native/jni/src/bloom_filter.h

+8 −6

Original line number	Diff line number	Diff line
		@@ -23,14 +23,16 @@

		namespace latinime {

		static inline void setInFilter(uint8_t *filter, const int position) {
		const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
		filter[bucket >> 3] \|= (1 << (bucket & 0x7));
		// TODO: uint32_t position
		static inline void setInFilter(uint8_t *filter, const int32_t position) {
		const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
		filter[bucket >> 3] \|= static_cast<uint8_t>(1 << (bucket & 0x7));
		}

		static inline bool isInFilter(const uint8_t *filter, const int position) {
		const unsigned int bucket = position % BIGRAM_FILTER_MODULO;
		return filter[bucket >> 3] & (1 << (bucket & 0x7));
		// TODO: uint32_t position
		static inline bool isInFilter(const uint8_t *filter, const int32_t position) {
		const uint32_t bucket = static_cast<uint32_t>(position % BIGRAM_FILTER_MODULO);
		return filter[bucket >> 3] & static_cast<uint8_t>(1 << (bucket & 0x7));
		}
		} // namespace latinime
		#endif // LATINIME_BLOOM_FILTER_H