Refactor parameters by naming convention (e0e67373) · Commits · e / os / android_packages_inputmethods_LatinIME

java/src/com/android/inputmethod/latin/BinaryDictionary.java

+2 −2

Original line number	Diff line number	Diff line
		@@ -91,7 +91,7 @@ public final class BinaryDictionary extends Dictionary {

		private static native long openNative(String sourceDir, long dictOffset, long dictSize);
		private static native void closeNative(long dict);
		private static native int getFrequencyNative(long dict, int[] word);
		private static native int getProbabilityNative(long dict, int[] word);
		private static native boolean isValidBigramNative(long dict, int[] word1, int[] word2);
		private static native int getSuggestionsNative(long dict, long proximityInfo,
		long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
		@@ -186,7 +186,7 @@ public final class BinaryDictionary extends Dictionary {
		public int getFrequency(final String word) {
		if (word == null) return -1;
		int[] codePoints = StringUtils.toCodePointArray(word);
		return getFrequencyNative(mNativeDict, codePoints);
		return getProbabilityNative(mNativeDict, codePoints);
		}

		// TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni

native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp

+4 −4

Original line number	Diff line number	Diff line
		@@ -203,14 +203,14 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
		return count;
		}

		static jint latinime_BinaryDictionary_getFrequency(JNIEnv *env, jclass clazz, jlong dict,
		static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
		jintArray wordArray) {
		Dictionary dictionary = reinterpret_cast<Dictionary >(dict);
		if (!dictionary) return 0;
		const jsize codePointLength = env->GetArrayLength(wordArray);
		int codePoints[codePointLength];
		env->GetIntArrayRegion(wordArray, 0, codePointLength, codePoints);
		return dictionary->getFrequency(codePoints, codePointLength);
		return dictionary->getProbability(codePoints, codePointLength);
		}

		static jboolean latinime_BinaryDictionary_isValidBigram(JNIEnv *env, jclass clazz, jlong dict,
		@@ -285,8 +285,8 @@ static JNINativeMethod sMethods[] = {
		{"closeNative", "(J)V", reinterpret_cast<void *>(latinime_BinaryDictionary_close)},
		{"getSuggestionsNative", "(JJJ[I[I[I[I[IIIZ[IZ[I[I[I[I)I",
		reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)},
		{"getFrequencyNative", "(J[I)I",
		reinterpret_cast<void *>(latinime_BinaryDictionary_getFrequency)},
		{"getProbabilityNative", "(J[I)I",
		reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)},
		{"isValidBigramNative", "(J[I[I)Z",
		reinterpret_cast<void *>(latinime_BinaryDictionary_isValidBigram)},
		{"calcNormalizedScoreNative", "([I[II)F",

native/jni/src/bigram_dictionary.cpp

+23 −22

Original line number	Diff line number	Diff line
		@@ -36,21 +36,21 @@ BigramDictionary::BigramDictionary(const uint8_t *const streamStart) : DICT_ROOT
		BigramDictionary::~BigramDictionary() {
		}

		void BigramDictionary::addWordBigram(int word, int length, int frequency, int bigramFreq,
		void BigramDictionary::addWordBigram(int word, int length, int probability, int bigramProbability,
		int bigramCodePoints, int outputTypes) const {
		word[length] = 0;
		if (DEBUG_DICT) {
		#ifdef FLAG_DBG
		char s[length + 1];
		for (int i = 0; i <= length; i++) s[i] = static_cast<char>(word[i]);
		AKLOGI("Bigram: Found word = %s, freq = %d :", s, frequency);
		AKLOGI("Bigram: Found word = %s, freq = %d :", s, probability);
		#endif
		}

		// Find the right insertion point
		int insertAt = 0;
		while (insertAt < MAX_RESULTS) {
		if (frequency > bigramFreq[insertAt] \|\| (bigramFreq[insertAt] == frequency
		if (probability > bigramProbability[insertAt] \|\| (bigramProbability[insertAt] == probability
		&& length < getCodePointCount(MAX_WORD_LENGTH,
		bigramCodePoints + insertAt * MAX_WORD_LENGTH))) {
		break;
		@@ -63,10 +63,10 @@ void BigramDictionary::addWordBigram(int word, int length, int frequency, int
		if (insertAt >= MAX_RESULTS) {
		return;
		}
		memmove(bigramFreq + (insertAt + 1),
		bigramFreq + insertAt,
		(MAX_RESULTS - insertAt - 1) * sizeof(bigramFreq[0]));
		bigramFreq[insertAt] = frequency;
		memmove(bigramProbability + (insertAt + 1),
		bigramProbability + insertAt,
		(MAX_RESULTS - insertAt - 1) * sizeof(bigramProbability[0]));
		bigramProbability[insertAt] = probability;
		outputTypes[insertAt] = Dictionary::KIND_PREDICTION;
		memmove(bigramCodePoints + (insertAt + 1) * MAX_WORD_LENGTH,
		bigramCodePoints + insertAt * MAX_WORD_LENGTH,
		@@ -87,7 +87,7 @@ void BigramDictionary::addWordBigram(int word, int length, int frequency, int
		* inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
		* inputSize: the size of the codes array.
		* bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
		* bigramFreq: an array to output frequencies.
		* bigramProbability: an array to output frequencies.
		* outputTypes: an array to output types.
		* This method returns the number of bigrams this word has, for backward compatibility.
		* Note: this is not the number of bigrams output in the array, which is the number of
		@@ -98,7 +98,7 @@ void BigramDictionary::addWordBigram(int word, int length, int frequency, int
		* reduce their scope to the ones that match the first letter.
		*/
		int BigramDictionary::getBigrams(const int prevWord, int prevWordLength, int inputCodePoints,
		int inputSize, int bigramCodePoints, int bigramFreq, int *outputTypes) const {
		int inputSize, int bigramCodePoints, int bigramProbability, int *outputTypes) const {
		// TODO: remove unused arguments, and refrain from storing stuff in members of this class
		// TODO: have "in" arguments before "out" ones, and make out args explicit in the name

		@@ -118,23 +118,24 @@ int BigramDictionary::getBigrams(const int prevWord, int prevWordLength, int i
		do {
		bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
		int bigramBuffer[MAX_WORD_LENGTH];
		int unigramFreq = 0;
		int unigramProbability = 0;
		const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
		&pos);
		const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
		bigramBuffer, &unigramFreq);
		bigramBuffer, &unigramProbability);

		// inputSize == 0 means we are trying to find bigram predictions.
		if (inputSize < 1 \|\| checkFirstCharacter(bigramBuffer, inputCodePoints)) {
		const int bigramFreqTemp = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
		// Due to space constraints, the frequency for bigrams is approximate - the lower the
		// unigram frequency, the worse the precision. The theoritical maximum error in
		// resulting frequency is 8 - although in the practice it's never bigger than 3 or 4
		const int bigramProbabilityTemp =
		BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
		// Due to space constraints, the probability for bigrams is approximate - the lower the
		// unigram probability, the worse the precision. The theoritical maximum error in
		// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
		// in very bad cases. This means that sometimes, we'll see some bigrams interverted
		// here, but it can't get too bad.
		const int frequency =
		BinaryFormat::computeFrequencyForBigram(unigramFreq, bigramFreqTemp);
		addWordBigram(bigramBuffer, length, frequency, bigramFreq, bigramCodePoints,
		const int probability = BinaryFormat::computeProbabilityForBigram(
		unigramProbability, bigramProbabilityTemp);
		addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
		outputTypes);
		++bigramCount;
		}
		@@ -159,13 +160,13 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
		} else {
		pos = BinaryFormat::skipOtherCharacters(root, pos);
		}
		pos = BinaryFormat::skipFrequency(flags, pos);
		pos = BinaryFormat::skipProbability(flags, pos);
		pos = BinaryFormat::skipChildrenPosition(flags, pos);
		pos = BinaryFormat::skipShortcuts(root, flags, pos);
		return pos;
		}

		void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevWord,
		void BigramDictionary::fillBigramAddressToProbabilityMapAndFilter(const int *prevWord,
		const int prevWordLength, std::map<int, int> map, uint8_t filter) const {
		memset(filter, 0, BIGRAM_FILTER_BYTE_SIZE);
		const uint8_t *const root = DICT_ROOT;
		@@ -181,10 +182,10 @@ void BigramDictionary::fillBigramAddressToFrequencyMapAndFilter(const int *prevW
		uint8_t bigramFlags;
		do {
		bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
		const int frequency = BinaryFormat::MASK_ATTRIBUTE_FREQUENCY & bigramFlags;
		const int probability = BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
		const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
		&pos);
		(*map)[bigramPos] = frequency;
		(*map)[bigramPos] = probability;
		setInFilter(filter, bigramPos);
		} while (0 != (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags));
		}

native/jni/src/bigram_dictionary.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -29,14 +29,14 @@ class BigramDictionary {
		BigramDictionary(const uint8_t *const streamStart);
		int getBigrams(const int word, int length, int inputCodePoints, int inputSize, int *outWords,
		int frequencies, int outputTypes) const;
		void fillBigramAddressToFrequencyMapAndFilter(const int *prevWord, const int prevWordLength,
		void fillBigramAddressToProbabilityMapAndFilter(const int *prevWord, const int prevWordLength,
		std::map<int, int> map, uint8_t filter) const;
		bool isValidBigram(const int word1, int length1, const int word2, int length2) const;
		~BigramDictionary();
		private:
		DISALLOW_IMPLICIT_CONSTRUCTORS(BigramDictionary);
		void addWordBigram(int word, int length, int frequency, int bigramFreq, int *bigramCodePoints,
		int *outputTypes) const;
		void addWordBigram(int word, int length, int probability, int bigramProbability,
		int bigramCodePoints, int outputTypes) const;
		bool checkFirstCharacter(int word, int inputCodePoints) const;
		int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
		const bool forceLowerCaseSearch) const;

native/jni/src/binary_format.h

+44 −40

Original line number	Diff line number	Diff line
		@@ -52,10 +52,10 @@ class BinaryFormat {
		// Flag for sign of offset. If this flag is set, the offset value must be negated.
		static const int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;

		// Mask for attribute frequency, stored on 4 bits inside the flags byte.
		static const int MASK_ATTRIBUTE_FREQUENCY = 0x0F;
		// The numeric value of the shortcut frequency that means 'whitelist'.
		static const int WHITELIST_SHORTCUT_FREQUENCY = 15;
		// Mask for attribute probability, stored on 4 bits inside the flags byte.
		static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
		// The numeric value of the shortcut probability that means 'whitelist'.
		static const int WHITELIST_SHORTCUT_PROBABILITY = 15;

		// Mask and flags for attribute address type selection.
		static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
		@@ -72,10 +72,10 @@ class BinaryFormat {
		static int getGroupCountAndForwardPointer(const uint8_t const dict, int pos);
		static uint8_t getFlagsAndForwardPointer(const uint8_t const dict, int pos);
		static int getCodePointAndForwardPointer(const uint8_t const dict, int pos);
		static int readFrequencyWithoutMovingPointer(const uint8_t *const dict, const int pos);
		static int readProbabilityWithoutMovingPointer(const uint8_t *const dict, const int pos);
		static int skipOtherCharacters(const uint8_t *const dict, const int pos);
		static int skipChildrenPosition(const uint8_t flags, const int pos);
		static int skipFrequency(const uint8_t flags, const int pos);
		static int skipProbability(const uint8_t flags, const int pos);
		static int skipShortcuts(const uint8_t *const dict, const uint8_t flags, const int pos);
		static int skipChildrenPosAndAttributes(const uint8_t *const dict, const uint8_t flags,
		const int pos);
		@@ -83,14 +83,15 @@ class BinaryFormat {
		static bool hasChildrenInFlags(const uint8_t flags);
		static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
		int *pos);
		static int getAttributeFrequencyFromFlags(const int flags);
		static int getAttributeProbabilityFromFlags(const int flags);
		static int getTerminalPosition(const uint8_t const root, const int const inWord,
		const int length, const bool forceLowerCaseSearch);
		static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
		int outWord, int outUnigramFrequency);
		static int computeFrequencyForBigram(const int unigramFreq, const int bigramFreq);
		int outWord, int outUnigramProbability);
		static int computeProbabilityForBigram(
		const int unigramProbability, const int bigramProbability);
		static int getProbability(const int position, const std::map<int, int> *bigramMap,
		const uint8_t *bigramFilter, const int unigramFreq);
		const uint8_t *bigramFilter, const int unigramProbability);

		// Flags for special processing
		// Those must match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
		@@ -264,7 +265,7 @@ AK_FORCE_INLINE int BinaryFormat::getCodePointAndForwardPointer(const uint8_t *c
		}
		}

		inline int BinaryFormat::readFrequencyWithoutMovingPointer(const uint8_t *const dict,
		inline int BinaryFormat::readProbabilityWithoutMovingPointer(const uint8_t *const dict,
		const int pos) {
		return dict[pos];
		}
		@@ -320,7 +321,7 @@ inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos
		return pos + childrenAddressSize(flags);
		}

		inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
		inline int BinaryFormat::skipProbability(const uint8_t flags, const int pos) {
		return FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
		}

		@@ -415,8 +416,8 @@ AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uin
		}
		}

		inline int BinaryFormat::getAttributeFrequencyFromFlags(const int flags) {
		return flags & MASK_ATTRIBUTE_FREQUENCY;
		inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
		return flags & MASK_ATTRIBUTE_PROBABILITY;
		}

		// This function gets the byte position of the last chargroup of the exact matching word in the
		@@ -466,7 +467,7 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
		if (wordPos == length) {
		return charGroupPos;
		}
		pos = BinaryFormat::skipFrequency(FLAG_IS_TERMINAL, pos);
		pos = BinaryFormat::skipProbability(FLAG_IS_TERMINAL, pos);
		}
		if (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS == (MASK_GROUP_ADDRESS_TYPE & flags)) {
		return NOT_VALID_WORD;
		@@ -481,7 +482,7 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
		if (FLAG_HAS_MULTIPLE_CHARS & flags) {
		pos = BinaryFormat::skipOtherCharacters(root, pos);
		}
		pos = BinaryFormat::skipFrequency(flags, pos);
		pos = BinaryFormat::skipProbability(flags, pos);
		pos = BinaryFormat::skipChildrenPosAndAttributes(root, flags, pos);
		}
		--charGroupCount;
		@@ -504,11 +505,11 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
		* address: the byte position of the last chargroup of the word we are searching for (this is
		* what is stored as the "bigram address" in each bigram)
		* outword: an array to write the found word, with MAX_WORD_LENGTH size.
		* outUnigramFrequency: a pointer to an int to write the frequency into.
		* outUnigramProbability: a pointer to an int to write the probability into.
		* Return value : the length of the word, of 0 if the word was not found.
		*/
		AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, const int address,
		const int maxDepth, int outWord, int outUnigramFrequency) {
		const int maxDepth, int outWord, int outUnigramProbability) {
		int pos = 0;
		int wordPos = 0;

		@@ -541,15 +542,15 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
		nextChar = getCodePointAndForwardPointer(root, &pos);
		}
		}
		*outUnigramFrequency = readFrequencyWithoutMovingPointer(root, pos);
		*outUnigramProbability = readProbabilityWithoutMovingPointer(root, pos);
		return ++wordPos;
		}
		// We need to skip past this char group, so skip any remaining chars after the
		// first and possibly the frequency.
		// first and possibly the probability.
		if (FLAG_HAS_MULTIPLE_CHARS & flags) {
		pos = skipOtherCharacters(root, pos);
		}
		pos = skipFrequency(flags, pos);
		pos = skipProbability(flags, pos);

		// The fact that this group has children is very important. Since we already know
		// that this group does not match, if it has no children we know it is irrelevant
		@@ -604,9 +605,9 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
		}
		}
		++wordPos;
		// Now we only need to branch to the children address. Skip the frequency if
		// Now we only need to branch to the children address. Skip the probability if
		// it's there, read pos, and break to resume the search at pos.
		lastCandidateGroupPos = skipFrequency(lastFlags, lastCandidateGroupPos);
		lastCandidateGroupPos = skipProbability(lastFlags, lastCandidateGroupPos);
		pos = readChildrenPosition(root, lastFlags, lastCandidateGroupPos);
		break;
		} else {
		@@ -635,36 +636,39 @@ AK_FORCE_INLINE int BinaryFormat::getWordAtAddress(const uint8_t *const root, co
		return 0;
		}

		static inline int backoff(const int unigramFreq) {
		return unigramFreq;
		static inline int backoff(const int unigramProbability) {
		return unigramProbability;
		// For some reason, applying the backoff weight gives bad results in tests. To apply the
		// backoff weight, we divide the probability by 2, which in our storing format means
		// decreasing the score by 8.
		// TODO: figure out what's wrong with this.
		// return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8);
		// return unigramProbability > 8 ? unigramProbability - 8 : (0 == unigramProbability ? 0 : 8);
		}

		inline int BinaryFormat::computeFrequencyForBigram(const int unigramFreq, const int bigramFreq) {
		// We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
		// unigram frequency to be the median value of the 17th step from the top. A value of
		// 0 for the bigram frequency represents the middle of the 16th step from the top,
		inline int BinaryFormat::computeProbabilityForBigram(
		const int unigramProbability, const int bigramProbability) {
		// We divide the range [unigramProbability..255] in 16.5 steps - in other words, we want the
		// unigram probability to be the median value of the 17th step from the top. A value of
		// 0 for the bigram probability represents the middle of the 16th step from the top,
		// while a value of 15 represents the middle of the top step.
		// See makedict.BinaryDictInputOutput for details.
		const float stepSize = static_cast<float>(MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
		return unigramFreq + static_cast<int>(static_cast<float>(bigramFreq + 1) * stepSize);
		const float stepSize = static_cast<float>(MAX_PROBABILITY - unigramProbability)
		/ (1.5f + MAX_BIGRAM_ENCODED_PROBABILITY);
		return unigramProbability
		+ static_cast<int>(static_cast<float>(bigramProbability + 1) * stepSize);
		}

		// This returns a probability in log space.
		inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
		const uint8_t *bigramFilter, const int unigramFreq) {
		if (!bigramMap \|\| !bigramFilter) return backoff(unigramFreq);
		if (!isInFilter(bigramFilter, position)) return backoff(unigramFreq);
		const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position);
		if (bigramFreqIt != bigramMap->end()) {
		const int bigramFreq = bigramFreqIt->second;
		return computeFrequencyForBigram(unigramFreq, bigramFreq);
		}
		return backoff(unigramFreq);
		const uint8_t *bigramFilter, const int unigramProbability) {
		if (!bigramMap \|\| !bigramFilter) return backoff(unigramProbability);
		if (!isInFilter(bigramFilter, position)) return backoff(unigramProbability);
		const std::map<int, int>::const_iterator bigramProbabilityIt = bigramMap->find(position);
		if (bigramProbabilityIt != bigramMap->end()) {
		const int bigramProbability = bigramProbabilityIt->second;
		return computeProbabilityForBigram(unigramProbability, bigramProbability);
		}
		return backoff(unigramProbability);
		}
		} // namespace latinime
		#endif // LATINIME_BINARY_FORMAT_H