Loading native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j scores, spaceIndices, outputTypes); } else { count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, inputCodePoints, inputSize, outputCodePoints, scores, outputTypes); outputCodePoints, scores, outputTypes); } // Copy back the output values Loading native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +15 −43 Original line number Diff line number Diff line Loading @@ -87,21 +87,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int /* Parameters : * prevWord: the word before, the one for which we need to look up bigrams. * prevWordLength: its length. * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions. * inputSize: the size of the codes array. * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * bigramProbability: an array to output frequencies. * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * outBigramProbability: an array to output frequencies. * outputTypes: an array to output types. * This method returns the number of bigrams this word has, for backward compatibility. * Note: this is not the number of bigrams output in the array, which is the number of * bigrams this word has WHOSE first letter also matches the letter the user typed. * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are * used to match the first letter of the second word, but once the user has typed more * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints, int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const { int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength, int *const outBigramCodePoints, int *const outBigramProbability, int *const outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name Loading @@ -126,22 +119,17 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in getCodePointsAndProbabilityAndReturnCodePointCount( mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // inputSize == 0 means we are trying to find bigram predictions. if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { const int bigramProbabilityTemp = bigramsIt.getProbability(); // Due to space constraints, the probability for bigrams is approximate - the lower the // unigram probability, the worse the precision. The theoritical maximum error in // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 // in very bad cases. This means that sometimes, we'll see some bigrams interverted // here, but it can't get too bad. const int probability = ProbabilityUtils::computeProbabilityForBigram( unigramProbability, bigramProbabilityTemp); addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints, unigramProbability, bigramsIt.getProbability()); addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints, outputTypes); ++bigramCount; } } return min(bigramCount, MAX_RESULTS); } Loading @@ -157,22 +145,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in mBinaryDictionaryInfo, pos); } bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. int maxAlt = MAX_ALTERNATIVES; const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word); while (maxAlt > 0) { if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { return true; } inputCodePoints++; maxAlt--; } return false; } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); Loading native/jni/src/suggest/core/dictionary/bigram_dictionary.h +2 −5 Original line number Diff line number Diff line Loading @@ -27,8 +27,8 @@ class BigramDictionary { public: BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); Loading @@ -37,13 +37,10 @@ class BigramDictionary { void addWordBigram(int *word, int length, int probability, int *bigramProbability, int *bigramCodePoints, int *outputTypes) const; bool checkFirstCharacter(int *word, int *inputCodePoints) const; int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; const BinaryDictionaryInfo *const mBinaryDictionaryInfo; // TODO: Re-implement proximity correction for bigram correction static const int MAX_ALTERNATIVES = 1; }; } // namespace latinime #endif // LATINIME_BIGRAM_DICTIONARY_H native/jni/src/suggest/core/dictionary/dictionary.cpp +3 −4 Original line number Diff line number Diff line Loading @@ -76,11 +76,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } } int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const { int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const { if (length <= 0) return 0; return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords, frequencies, outputTypes); return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); } int Dictionary::getProbability(const int *word, int length) const { Loading native/jni/src/suggest/core/dictionary/dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -62,8 +62,8 @@ class Dictionary { const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes) const; int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const; int getProbability(const int *word, int length) const; Loading Loading
native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j scores, spaceIndices, outputTypes); } else { count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength, inputCodePoints, inputSize, outputCodePoints, scores, outputTypes); outputCodePoints, scores, outputTypes); } // Copy back the output values Loading
native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +15 −43 Original line number Diff line number Diff line Loading @@ -87,21 +87,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int /* Parameters : * prevWord: the word before, the one for which we need to look up bigrams. * prevWordLength: its length. * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions. * inputSize: the size of the codes array. * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * bigramProbability: an array to output frequencies. * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions. * outBigramProbability: an array to output frequencies. * outputTypes: an array to output types. * This method returns the number of bigrams this word has, for backward compatibility. * Note: this is not the number of bigrams output in the array, which is the number of * bigrams this word has WHOSE first letter also matches the letter the user typed. * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are * used to match the first letter of the second word, but once the user has typed more * and the bigrams are used to boost unigram result scores, it makes little sense to * reduce their scope to the ones that match the first letter. */ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints, int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const { int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength, int *const outBigramCodePoints, int *const outBigramProbability, int *const outputTypes) const { // TODO: remove unused arguments, and refrain from storing stuff in members of this class // TODO: have "in" arguments before "out" ones, and make out args explicit in the name Loading @@ -126,22 +119,17 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in getCodePointsAndProbabilityAndReturnCodePointCount( mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // inputSize == 0 means we are trying to find bigram predictions. if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) { const int bigramProbabilityTemp = bigramsIt.getProbability(); // Due to space constraints, the probability for bigrams is approximate - the lower the // unigram probability, the worse the precision. The theoritical maximum error in // resulting probability is 8 - although in the practice it's never bigger than 3 or 4 // in very bad cases. This means that sometimes, we'll see some bigrams interverted // here, but it can't get too bad. const int probability = ProbabilityUtils::computeProbabilityForBigram( unigramProbability, bigramProbabilityTemp); addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints, unigramProbability, bigramsIt.getProbability()); addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints, outputTypes); ++bigramCount; } } return min(bigramCount, MAX_RESULTS); } Loading @@ -157,22 +145,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in mBinaryDictionaryInfo, pos); } bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const { // Checks whether this word starts with same character or neighboring characters of // what user typed. int maxAlt = MAX_ALTERNATIVES; const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word); while (maxAlt > 0) { if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { return true; } inputCodePoints++; maxAlt--; } return false; } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const { int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); Loading
native/jni/src/suggest/core/dictionary/bigram_dictionary.h +2 −5 Original line number Diff line number Diff line Loading @@ -27,8 +27,8 @@ class BigramDictionary { public: BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo); int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getPredictions(const int *word, int length, int *outBigramCodePoints, int *outBigramProbability, int *outputTypes) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; ~BigramDictionary(); Loading @@ -37,13 +37,10 @@ class BigramDictionary { void addWordBigram(int *word, int length, int probability, int *bigramProbability, int *bigramCodePoints, int *outputTypes) const; bool checkFirstCharacter(int *word, int *inputCodePoints) const; int getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const; const BinaryDictionaryInfo *const mBinaryDictionaryInfo; // TODO: Re-implement proximity correction for bigram correction static const int MAX_ALTERNATIVES = 1; }; } // namespace latinime #endif // LATINIME_BIGRAM_DICTIONARY_H
native/jni/src/suggest/core/dictionary/dictionary.cpp +3 −4 Original line number Diff line number Diff line Loading @@ -76,11 +76,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession } } int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const { int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const { if (length <= 0) return 0; return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords, frequencies, outputTypes); return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes); } int Dictionary::getProbability(const int *word, int length) const { Loading
native/jni/src/suggest/core/dictionary/dictionary.h +2 −2 Original line number Diff line number Diff line Loading @@ -62,8 +62,8 @@ class Dictionary { const SuggestOptions *const suggestOptions, int *outWords, int *frequencies, int *spaceIndices, int *outputTypes) const; int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords, int *frequencies, int *outputTypes) const; int getBigrams(const int *word, int length, int *outWords, int *frequencies, int *outputTypes) const; int getProbability(const int *word, int length) const; Loading