Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c912957f authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Move word flags to language model dict content."

parents a2dbc448 7d911d6f
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -71,6 +71,11 @@ class UnigramProperty {
        return mIsBlacklisted;
    }

    bool isPossiblyOffensive() const {
        // TODO: Have dedicated flag.
        return mProbability == 0;
    }

    bool hasShortcuts() const {
        return !mShortcuts.empty();
    }
+12 −5
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ bool LanguageModelDictContent::runGC(
            0 /* nextLevelBitmapEntryIndex */, outNgramCount);
}

int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordIds,
const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArrayView prevWordIds,
        const int wordId, const HeaderPolicy *const headerPolicy) const {
    int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
    bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex();
@@ -60,17 +60,24 @@ int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordI
        }
        const ProbabilityEntry probabilityEntry =
                ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
        int probability = NOT_A_PROBABILITY;
        if (mHasHistoricalInfo) {
            const int probability = ForgettingCurveUtils::decodeProbability(
            const int rawProbability = ForgettingCurveUtils::decodeProbability(
                    probabilityEntry.getHistoricalInfo(), headerPolicy)
                            + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
            return std::min(probability, MAX_PROBABILITY);
            probability = std::min(rawProbability, MAX_PROBABILITY);
        } else {
            return probabilityEntry.getProbability();
            probability = probabilityEntry.getProbability();
        }
        // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in
        // probabilityEntry.
        const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId);
        return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(),
                unigramProbabilityEntry.isBlacklisted(),
                unigramProbabilityEntry.isPossiblyOffensive());
    }
    // Cannot find the word.
    return NOT_A_PROBABILITY;
    return WordAttributes();
}

ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
+2 −1
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <vector>

#include "defines.h"
#include "suggest/core/dictionary/word_attributes.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h"
#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
@@ -128,7 +129,7 @@ class LanguageModelDictContent {
            const LanguageModelDictContent *const originalContent,
            int *const outNgramCount);

    int getWordProbability(const WordIdArrayView prevWordIds, const int wordId,
    const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId,
            const HeaderPolicy *const headerPolicy) const;

    ProbabilityEntry getProbabilityEntry(const int wordId) const {
+27 −3
Original line number Diff line number Diff line
@@ -49,7 +49,9 @@ class ProbabilityEntry {

    // Create from unigram property.
    ProbabilityEntry(const UnigramProperty *const unigramProperty)
            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence())),
            : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
                    unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                    unigramProperty->isPossiblyOffensive())),
              mProbability(unigramProperty->getProbability()),
              mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(),
                      unigramProperty->getCount()) {}
@@ -85,6 +87,18 @@ class ProbabilityEntry {
        return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
    }

    bool isNotAWord() const {
        return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
    }

    bool isBlacklisted() const {
        return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
    }

    bool isPossiblyOffensive() const {
        return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
    }

    uint64_t encode(const bool hasHistoricalInfo) const {
        uint64_t encodedEntry = static_cast<uint64_t>(mFlags);
        if (hasHistoricalInfo) {
@@ -142,10 +156,20 @@ class ProbabilityEntry {
                (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
    }

    static uint8_t createFlags(const bool representsBeginningOfSentence) {
    static uint8_t createFlags(const bool representsBeginningOfSentence,
            const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
        uint8_t flags = 0;
        if (representsBeginningOfSentence) {
            flags ^= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
            flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
        }
        if (isNotAWord) {
            flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
        }
        if (isBlacklisted) {
            flags |= Ver4DictConstants::FLAG_BLACKLISTED;
        }
        if (isPossiblyOffensive) {
            flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
        }
        return flags;
    }
+3 −0
Original line number Diff line number Diff line
@@ -54,6 +54,9 @@ const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;

const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;
const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4;
const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8;
const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10;

const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
Loading