Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9f8c9a01 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use PrevWordsInfo to add/remove n(bi)-gram in native code.

Bug: 14119293
Bug: 14425059
Change-Id: I4b9a46bfd670b35195418eaee51456d44fb91b6d
parent c18b1c42
Loading
Loading
Loading
Loading
+15 −8
Original line number Diff line number Diff line
@@ -343,7 +343,7 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
    // Use 1 for count to indicate the word has inputted.
    const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
            probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
    dictionary->addUnigramWord(codePoints, codePointCount, &unigramProperty);
    dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
}

static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@@ -363,7 +363,9 @@ static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz,
    // Use 1 for count to indicate the bigram has inputted.
    const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
            timestamp, 0 /* level */, 1 /* count */);
    dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty);
    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
            false /* isBeginningOfSentence */);
    dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
}

static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@@ -378,8 +380,9 @@ static void latinime_BinaryDictionary_removeBigramWords(JNIEnv *env, jclass claz
    jsize word1Length = env->GetArrayLength(word1);
    int word1CodePoints[word1Length];
    env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
    dictionary->removeBigramWords(word0CodePoints, word0Length, word1CodePoints,
            word1Length);
    const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
            false /* isBeginningOfSentence */);
    dictionary->removeNgramEntry(&prevWordsInfo, word1CodePoints, word1Length);
}

// Returns how many language model params are processed.
@@ -449,7 +452,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
        // Use 1 for count to indicate the word has inputted.
        const UnigramProperty unigramProperty(isNotAWord, isBlacklisted,
                unigramProbability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
        dictionary->addUnigramWord(word1CodePoints, word1Length, &unigramProperty);
        dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
        if (word0) {
            jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
            const std::vector<int> bigramTargetCodePoints(
@@ -457,7 +460,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
            // Use 1 for count to indicate the bigram has inputted.
            const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
                    timestamp, 0 /* level */, 1 /* count */);
            dictionary->addBigramWords(word0CodePoints, word0Length, &bigramProperty);
            const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
                    false /* isBeginningOfSentence */);
            dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
        }
        if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
            return i + 1;
@@ -541,7 +546,7 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
                return false;
            }
        }
        if (!dictionaryStructureWithBufferPolicy->addUnigramWord(wordCodePoints, wordLength,
        if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, wordLength,
                wordProperty.getUnigramProperty())) {
            LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
            return false;
@@ -561,8 +566,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
                return false;
            }
        }
        const PrevWordsInfo prevWordsInfo(wordCodePoints, wordLength,
                false /* isStartOfSentence */);
        for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
            if (!dictionaryStructureWithBufferPolicy->addBigramWords(wordCodePoints, wordLength,
            if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
                    &bigramProperty)) {
                LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
                return false;
+2 −2
Original line number Diff line number Diff line
@@ -203,12 +203,12 @@ class DicNode {
        return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
    }

    // Used to get n-gram probability in DicNodeUtils
    // Used to get n-gram probability in DicNodeUtils.
    int getPtNodePos() const {
        return mDicNodeProperties.getPtNodePos();
    }

    // Used to get n-gram probability in DicNodeUtils
    // Used to get n-gram probability in DicNodeUtils. n is 1-indexed.
    int getNthPrevWordTerminalPtNodePos(const int n) const {
        if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
            return NOT_A_DICT_POS;
+10 −10
Original line number Diff line number Diff line
@@ -74,28 +74,28 @@ int Dictionary::getProbability(const int *word, int length) const {
    return getDictionaryStructurePolicy()->getUnigramProbabilityOfPtNode(pos);
}

int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1,
        int length1) const {
int Dictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
        int length) const {
    TimeKeeper::setCurrentTime();
    return mBigramDictionary.getBigramProbability(prevWordsInfo, word1, length1);
    return mBigramDictionary.getBigramProbability(prevWordsInfo, word, length);
}

void Dictionary::addUnigramWord(const int *const word, const int length,
void Dictionary::addUnigramEntry(const int *const word, const int length,
        const UnigramProperty *const unigramProperty) {
    TimeKeeper::setCurrentTime();
    mDictionaryStructureWithBufferPolicy->addUnigramWord(word, length, unigramProperty);
    mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
}

void Dictionary::addBigramWords(const int *const word0, const int length0,
void Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
        const BigramProperty *const bigramProperty) {
    TimeKeeper::setCurrentTime();
    mDictionaryStructureWithBufferPolicy->addBigramWords(word0, length0, bigramProperty);
    mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
}

void Dictionary::removeBigramWords(const int *const word0, const int length0,
        const int *const word1, const int length1) {
void Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
        const int *const word, const int length) {
    TimeKeeper::setCurrentTime();
    mDictionaryStructureWithBufferPolicy->removeBigramWords(word0, length0, word1, length1);
    mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
}

void Dictionary::flush(const char *const filePath) {
+5 −5
Original line number Diff line number Diff line
@@ -73,16 +73,16 @@ class Dictionary {
    int getProbability(const int *word, int length) const;

    int getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
            const int *word1, int length1) const;
            const int *word, int length) const;

    void addUnigramWord(const int *const codePoints, const int codePointCount,
    void addUnigramEntry(const int *const codePoints, const int codePointCount,
            const UnigramProperty *const unigramProperty);

    void addBigramWords(const int *const word0, const int length0,
    void addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const BigramProperty *const bigramProperty);

    void removeBigramWords(const int *const word0, const int length0, const int *const word1,
            const int length1);
    void removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
            const int length);

    void flush(const char *const filePath);

+1 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@

namespace latinime {

// TODO: Change to NgramProperty.
class BigramProperty {
 public:
    BigramProperty(const std::vector<int> *const targetCodePoints,
Loading