Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e6926663 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Support n-gram for look-up."

parents f116f910 4926b90e
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -167,6 +167,14 @@ int LanguageModelDictContent::createAndGetBitmapEntryIndex(const WordIdArrayView
    if (lastBitmapEntryIndex == TrieMap::INVALID_INDEX) {
        return TrieMap::INVALID_INDEX;
    }
    const int oldestPrevWordId = prevWordIds[prevWordIds.size() - 1];
    const TrieMap::Result result = mTrieMap.get(oldestPrevWordId, lastBitmapEntryIndex);
    if (!result.mIsValid) {
        if (!mTrieMap.put(oldestPrevWordId,
                ProbabilityEntry().encode(mHasHistoricalInfo), lastBitmapEntryIndex)) {
            return TrieMap::INVALID_INDEX;
        }
    }
    return mTrieMap.getNextLevelBitmapEntryIndex(prevWordIds[prevWordIds.size() - 1],
            lastBitmapEntryIndex);
}
+3 −2
Original line number Diff line number Diff line
@@ -36,7 +36,8 @@ class ProbabilityEntry {

    // Dummy entry
    ProbabilityEntry()
            : mFlags(0), mProbability(NOT_A_PROBABILITY), mHistoricalInfo() {}
            : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
              mHistoricalInfo() {}

    // Entry without historical information
    ProbabilityEntry(const int flags, const int probability)
@@ -61,7 +62,7 @@ class ProbabilityEntry {
                      bigramProperty->getCount()) {}

    bool isValid() const {
        return (mProbability != NOT_A_PROBABILITY) || hasHistoricalInfo();
        return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
    }

    bool hasHistoricalInfo() const {
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;

const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1;
const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2;

const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
+1 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ class Ver4DictConstants {
    static const int WORD_COUNT_FIELD_SIZE;
    // Flags in probability entry.
    static const uint8_t FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
    static const uint8_t FLAG_NOT_A_VALID_ENTRY;

    static const int SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE;
    static const int SHORTCUT_ADDRESS_TABLE_DATA_SIZE;
+14 −15
Original line number Diff line number Diff line
@@ -120,16 +120,15 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
    const int ptNodePos =
            mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
    const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    // TODO: Support n-gram.
    const int probability = mBuffers->getLanguageModelDictContent()->getWordProbability(
            prevWordIds.limit(1 /* maxSize */), wordId, mHeaderPolicy);
            prevWordIds, wordId, mHeaderPolicy);
    return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
            probability == 0);
}

int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
        const int wordId) const {
    if (wordId == NOT_A_WORD_ID) {
    if (wordId == NOT_A_WORD_ID || prevWordIds.contains(NOT_A_WORD_ID)) {
        return NOT_A_PROBABILITY;
    }
    const int ptNodePos =
@@ -138,10 +137,8 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordI
    if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
        return NOT_A_PROBABILITY;
    }
    // TODO: Support n-gram.
    const ProbabilityEntry probabilityEntry =
            mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(
                    prevWordIds.limit(1 /* maxSize */), wordId);
            mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry(prevWordIds, wordId);
    if (!probabilityEntry.isValid()) {
        return NOT_A_PROBABILITY;
    }
@@ -164,18 +161,20 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI
    if (prevWordIds.empty()) {
        return;
    }
    // TODO: Support n-gram.
    const auto languageModelDictContent = mBuffers->getLanguageModelDictContent();
    for (size_t i = 1; i <= prevWordIds.size(); ++i) {
        for (const auto entry : languageModelDictContent->getProbabilityEntries(
            prevWordIds.limit(1 /* maxSize */))) {
                prevWordIds.limit(i))) {
            const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
            const int probability = probabilityEntry.hasHistoricalInfo() ?
                    ForgettingCurveUtils::decodeProbability(
                        probabilityEntry.getHistoricalInfo(), mHeaderPolicy) :
                            probabilityEntry.getHistoricalInfo(), mHeaderPolicy)
                            + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */) :
                    probabilityEntry.getProbability();
            listener->onVisitEntry(probability, entry.getWordId());
        }
    }
}

int Ver4PatriciaTriePolicy::getShortcutPositionOfWord(const int wordId) const {
    if (wordId == NOT_A_WORD_ID) {
Loading