Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2a2aac25 authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi
Browse files

Remove checkFirstCharacter from BigramDictionary.

Bug: 10028452
Change-Id: I27b147e83b312d73e975a0b2bc8074b33906e56e
parent ab2d2731
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -186,7 +186,7 @@ static int latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, j
                scores, spaceIndices, outputTypes);
    } else {
        count = dictionary->getBigrams(prevWordCodePoints, prevWordCodePointsLength,
                inputCodePoints, inputSize, outputCodePoints, scores, outputTypes);
                outputCodePoints, scores, outputTypes);
    }

    // Copy back the output values
+15 −43
Original line number Diff line number Diff line
@@ -88,21 +88,14 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
/* Parameters :
 * prevWord: the word before, the one for which we need to look up bigrams.
 * prevWordLength: its length.
 * inputCodePoints: what user typed, in the same format as for UnigramDictionary::getSuggestions.
 * inputSize: the size of the codes array.
 * bigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
 * bigramProbability: an array to output frequencies.
 * outBigramCodePoints: an array for output, at the same format as outwords for getSuggestions.
 * outBigramProbability: an array to output frequencies.
 * outputTypes: an array to output types.
 * This method returns the number of bigrams this word has, for backward compatibility.
 * Note: this is not the number of bigrams output in the array, which is the number of
 * bigrams this word has WHOSE first letter also matches the letter the user typed.
 * TODO: this may not be a sensible thing to do. It makes sense when the bigrams are
 * used to match the first letter of the second word, but once the user has typed more
 * and the bigrams are used to boost unigram result scores, it makes little sense to
 * reduce their scope to the ones that match the first letter.
 */
int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints,
        int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLength,
        int *const outBigramCodePoints, int *const outBigramProbability,
        int *const outputTypes) const {
    // TODO: remove unused arguments, and refrain from storing stuff in members of this class
    // TODO: have "in" arguments before "out" ones, and make out args explicit in the name

@@ -127,22 +120,17 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
                getCodePointsAndProbabilityAndReturnCodePointCount(
                        mBinaryDictionaryInfo, bigramsIt.getBigramPos(), MAX_WORD_LENGTH,
                        bigramBuffer, &unigramProbability);

        // inputSize == 0 means we are trying to find bigram predictions.
        if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
            const int bigramProbabilityTemp = bigramsIt.getProbability();
        // Due to space constraints, the probability for bigrams is approximate - the lower the
        // unigram probability, the worse the precision. The theoritical maximum error in
        // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
        // in very bad cases. This means that sometimes, we'll see some bigrams interverted
        // here, but it can't get too bad.
        const int probability = ProbabilityUtils::computeProbabilityForBigram(
                    unigramProbability, bigramProbabilityTemp);
            addWordBigram(bigramBuffer, length, probability, bigramProbability, bigramCodePoints,
                unigramProbability, bigramsIt.getProbability());
        addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
                outputTypes);
        ++bigramCount;
    }
    }
    return min(bigramCount, MAX_RESULTS);
}

@@ -158,22 +146,6 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
            mBinaryDictionaryInfo, pos);
}

bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) const {
    // Checks whether this word starts with same character or neighboring characters of
    // what user typed.

    int maxAlt = MAX_ALTERNATIVES;
    const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word);
    while (maxAlt > 0) {
        if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) {
            return true;
        }
        inputCodePoints++;
        maxAlt--;
    }
    return false;
}

bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
        int length1) const {
    int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
+2 −5
Original line number Diff line number Diff line
@@ -27,8 +27,8 @@ class BigramDictionary {
 public:
    BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);

    int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize,
            int *outWords, int *frequencies, int *outputTypes) const;
    int getPredictions(const int *word, int length, int *outBigramCodePoints,
            int *outBigramProbability, int *outputTypes) const;
    bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
    ~BigramDictionary();

@@ -37,13 +37,10 @@ class BigramDictionary {

    void addWordBigram(int *word, int length, int probability, int *bigramProbability,
            int *bigramCodePoints, int *outputTypes) const;
    bool checkFirstCharacter(int *word, int *inputCodePoints) const;
    int getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
            const bool forceLowerCaseSearch) const;

    const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
    // TODO: Re-implement proximity correction for bigram correction
    static const int MAX_ALTERNATIVES = 1;
};
} // namespace latinime
#endif // LATINIME_BIGRAM_DICTIONARY_H
+3 −4
Original line number Diff line number Diff line
@@ -77,11 +77,10 @@ int Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
    }
}

int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, int inputSize,
        int *outWords, int *frequencies, int *outputTypes) const {
int Dictionary::getBigrams(const int *word, int length, int *outWords, int *frequencies,
        int *outputTypes) const {
    if (length <= 0) return 0;
    return mBigramDictionary->getPredictions(word, length, inputCodePoints, inputSize, outWords,
            frequencies, outputTypes);
    return mBigramDictionary->getPredictions(word, length, outWords, frequencies, outputTypes);
}

int Dictionary::getProbability(const int *word, int length) const {
+2 −2
Original line number Diff line number Diff line
@@ -62,8 +62,8 @@ class Dictionary {
            const SuggestOptions *const suggestOptions, int *outWords, int *frequencies,
            int *spaceIndices, int *outputTypes) const;

    int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
            int *frequencies, int *outputTypes) const;
    int getBigrams(const int *word, int length, int *outWords, int *frequencies,
            int *outputTypes) const;

    int getProbability(const int *word, int length) const;