Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7d911d6f authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Move word flags to language model dict content.

Bug: 14425059
Change-Id: I64712e5c83d0bc241e6f0f16117ab47b5d75bd4b
parent ddfaeff5
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -71,6 +71,11 @@ class UnigramProperty {
        return mIsBlacklisted;
    }

    bool isPossiblyOffensive() const {
        // TODO: Have dedicated flag.
        return mProbability == 0;
    }

    bool hasShortcuts() const {
        return !mShortcuts.empty();
    }
+12 −5
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ bool LanguageModelDictContent::runGC(
            0 /* nextLevelBitmapEntryIndex */, outNgramCount);
}

int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordIds,
const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArrayView prevWordIds,
        const int wordId, const HeaderPolicy *const headerPolicy) const {
    int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
    bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex();
@@ -60,17 +60,24 @@ int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordI
        }
        const ProbabilityEntry probabilityEntry =
                ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
        int probability = NOT_A_PROBABILITY;
        if (mHasHistoricalInfo) {
            const int probability = ForgettingCurveUtils::decodeProbability(
            const int rawProbability = ForgettingCurveUtils::decodeProbability(
                    probabilityEntry.getHistoricalInfo(), headerPolicy)
                            + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
            return std::min(probability, MAX_PROBABILITY);
            probability = std::min(rawProbability, MAX_PROBABILITY);
        } else {
            return probabilityEntry.getProbability();
            probability = probabilityEntry.getProbability();
        }
        // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in
        // probabilityEntry.
        const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId);
        return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(),
                unigramProbabilityEntry.isBlacklisted(),
                unigramProbabilityEntry.isPossiblyOffensive());
    }
    // Cannot find the word.
    return NOT_A_PROBABILITY;
    return WordAttributes();
}

ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
+2 −1
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <vector>

#include "defines.h"
#include "suggest/core/dictionary/word_attributes.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -128,7 +129,7 @@ class LanguageModelDictContent {
            const LanguageModelDictContent *const originalContent,
            int *const outNgramCount);

    int getWordProbability(const WordIdArrayView prevWordIds, const int wordId,
    const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId,
            const HeaderPolicy *const headerPolicy) const;

    ProbabilityEntry getProbabilityEntry(const int wordId) const {
+27 −3
Original line number Diff line number Diff line
@@ -49,7 +49,9 @@ class ProbabilityEntry {

    // Create from unigram property.
    ProbabilityEntry(const UnigramProperty *const unigramProperty)
            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence())),
            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
                    unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                    unigramProperty->isPossiblyOffensive())),
              mProbability(unigramProperty->getProbability()),
              mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
                      unigramProperty->getCount()) {}
@@ -85,6 +87,18 @@ class ProbabilityEntry {
        return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
    }

    bool isNotAWord() const {
        return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
    }

    bool isBlacklisted() const {
        return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
    }

    bool isPossiblyOffensive() const {
        return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
    }

    uint64_t encode(const bool hasHistoricalInfo) const {
        uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
        if (hasHistoricalInfo) {
@@ -142,10 +156,20 @@ class ProbabilityEntry {
                (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
    }

    static uint8_t createFlags(const bool representsBeginningOfSentence) {
    static uint8_t createFlags(const bool representsBeginningOfSentence,
            const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
        uint8_t flags = 0;
        if (representsBeginningOfSentence) {
            flags ^= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
            flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
        }
        if (isNotAWord) {
            flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
        }
        if (isBlacklisted) {
            flags |= Ver4DictConstants::FLAG_BLACKLISTED;
        }
        if (isPossiblyOffensive) {
            flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
        }
        return flags;
    }
+3 −0
Original line number Diff line number Diff line
@@ -54,6 +54,9 @@ const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;

const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;
const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4;
const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8;
const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10;

const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
Loading