am 22f951c8: Merge "Remove checkFirstCharacter from BigramDictionary." (be81b05f) · Commits · e / os / android_packages_inputmethods_LatinIME

native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
		scores, spaceIndices, outputTypes);
		} else {
		count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
		inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
		outputCodePoints, scores, outputTypes);
		}

		// Copy back the output values

native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp

+15 −43

Original line number	Diff line number	Diff line
		@@ -87,21 +87,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
		/* Parameters :
		* prevWord: the word before, the one for which we need to look up bigrams.
		* prevWordLength: its length.
		* inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
		* inputSize: the size of the codes array.
		* bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
		* bigramProbability: an array to output frequencies.
		* outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
		* outBigramProbability: an array to output frequencies.
		* outputTypes: an array to output types.
		* This method returns the number of bigrams this word has, for backward compatibility.
		* Note: this is not the number of bigrams output in the array, which is the number of
		* bigrams this word has WHOSE first letter also matches the letter the user typed.
		* TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
		* used to match the first letter of the second word, but once the user has typed more
		* and the bigrams are used to boost unigram result scores, it makes little sense to
		* reduce their scope to the ones that match the first letter.
		*/
		int BigramDictionary::getPredictions(const int prevWord, int prevWordLength, int inputCodePoints,
		int inputSize, int bigramCodePoints, int bigramProbability, int *outputTypes) const {
		int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
		int const outBigramCodePoints, int const outBigramProbability,
		int *const outputTypes) const {
		// TODO: remove unused arguments, and refrain from storing stuff in members of this class
		// TODO: have "in" arguments before "out" ones, and make out args explicit in the name

		@@ -126,22 +119,17 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
		getCodePointsAndProbabilityAndReturnCodePointCount(
		mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
		bigramBuffer, &unigramProbability);

		// inputSize == 0 means we are trying to find bigram predictions.
		if (inputSize < 1 \|\| checkFirstCharacter(bigramBuffer, inputCodePoints)) {
		const int bigramProbabilityTemp = bigramsIt.getProbability();
		// Due to space constraints, the probability for bigrams is approximate - the lower the
		// unigram probability, the worse the precision. The theoritical maximum error in
		// resulting probability is 8 - although in the practice it's never bigger than 3 or 4
		// in very bad cases. This means that sometimes, we'll see some bigrams interverted
		// here, but it can't get too bad.
		const int probability = ProbabilityUtils::computeProbabilityForBigram(
		unigramProbability, bigramProbabilityTemp);
		addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
		unigramProbability, bigramsIt.getProbability());
		addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
		outputTypes);
		++bigramCount;
		}
		}
		return min(bigramCount, MAX_RESULTS);
		}

		@@ -157,22 +145,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
		mBinaryDictionaryInfo, pos);
		}

		bool BigramDictionary::checkFirstCharacter(int word, int inputCodePoints) const {
		// Checks whether this word starts with same character or neighboring characters of
		// what user typed.

		int maxAlt = MAX_ALTERNATIVES;
		const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
		while (maxAlt > 0) {
		if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
		return true;
		}
		inputCodePoints++;
		maxAlt--;
		}
		return false;
		}

		bool BigramDictionary::isValidBigram(const int word0, int length0, const int word1,
		int length1) const {
		int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);

native/jni/src/suggest/core/dictionary/bigram_dictionary.h

+2 −5

Original line number	Diff line number	Diff line
		@@ -27,8 +27,8 @@ class BigramDictionary {
		public:
		BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);

		int getPredictions(const int word, int length, int inputCodePoints, int inputSize,
		int outWords, int frequencies, int *outputTypes) const;
		int getPredictions(const int word, int length, int outBigramCodePoints,
		int outBigramProbability, int outputTypes) const;
		bool isValidBigram(const int word1, int length1, const int word2, int length2) const;
		~BigramDictionary();

		@@ -37,13 +37,10 @@ class BigramDictionary {

		void addWordBigram(int word, int length, int probability, int bigramProbability,
		int bigramCodePoints, int outputTypes) const;
		bool checkFirstCharacter(int word, int inputCodePoints) const;
		int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
		const bool forceLowerCaseSearch) const;

		const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
		// TODO: Re-implement proximity correction for bigram correction
		static const int MAX_ALTERNATIVES = 1;
		};
		} // namespace latinime
		#endif // LATINIME_BIGRAM_DICTIONARY_H

native/jni/src/suggest/core/dictionary/dictionary.cpp

+3 −4

Original line number	Diff line number	Diff line
		@@ -76,11 +76,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
		}
		}

		int Dictionary::getBigrams(const int word, int length, int inputCodePoints, int inputSize,
		int outWords, int frequencies, int *outputTypes) const {
		int Dictionary::getBigrams(const int word, int length, int outWords, int *frequencies,
		int *outputTypes) const {
		if (length <= 0) return 0;
		return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords,
		frequencies, outputTypes);
		return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
		}

		int Dictionary::getProbability(const int *word, int length) const {

native/jni/src/suggest/core/dictionary/dictionary.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -62,8 +62,8 @@ class Dictionary {
		const SuggestOptions const suggestOptions, int outWords, int *frequencies,
		int spaceIndices, int outputTypes) const;

		int getBigrams(const int word, int length, int inputCodePoints, int inputSize, int *outWords,
		int frequencies, int outputTypes) const;
		int getBigrams(const int word, int length, int outWords, int *frequencies,
		int *outputTypes) const;

		int getProbability(const int *word, int length) const;