Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cb4f5441 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Quit reading unigram probability in Ver4PatriciaTrieNodeReader.

Bug: 14425059
Change-Id: I4fc7b0e236151a2c64e7131772264024c6597633
parent 2842e50c
Loading
Loading
Loading
Loading
+8 −3
Original line number Diff line number Diff line
@@ -63,9 +63,14 @@ const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArr
        int probability = NOT_A_PROBABILITY;
        if (mHasHistoricalInfo) {
            const int rawProbability = ForgettingCurveUtils::decodeProbability(
                    probabilityEntry.getHistoricalInfo(), headerPolicy)
                            + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */);
            probability = std::min(rawProbability, MAX_PROBABILITY);
                    probabilityEntry.getHistoricalInfo(), headerPolicy);
            if (rawProbability == NOT_A_PROBABILITY) {
                // The entry should not be treated as a valid entry.
                continue;
            }
            probability = std::min(rawProbability
                    + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */),
                            MAX_PROBABILITY);
        } else {
            probability = probabilityEntry.getProbability();
        }
+5 −14
Original line number Diff line number Diff line
@@ -51,26 +51,17 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
    const int parentPos =
            DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
    int codePoints[MAX_WORD_LENGTH];
    const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
    // Code point table is not used for ver4 dictionaries.
    const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, nullptr /* codePointTable */, codePoints, &pos);
    int terminalIdFieldPos = NOT_A_DICT_POS;
    int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
    int probability = NOT_A_PROBABILITY;
    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
        terminalIdFieldPos = pos;
        if (usesAdditionalBuffer) {
            terminalIdFieldPos += mBuffer->getOriginalBufferSize();
        }
        terminalId = Ver4PatriciaTrieReadingUtils::getTerminalIdAndAdvancePosition(dictBuf, &pos);
        // TODO: Quit reading probability here.
        const ProbabilityEntry probabilityEntry =
                mLanguageModelDictContent->getProbabilityEntry(terminalId);
        if (probabilityEntry.hasHistoricalInfo()) {
            probability = ForgettingCurveUtils::decodeProbability(
                    probabilityEntry.getHistoricalInfo(), mHeaderPolicy);
        } else {
            probability = probabilityEntry.getProbability();
        }
    }
    int childrenPosFieldPos = pos;
    if (usesAdditionalBuffer) {
@@ -91,8 +82,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
        // The destination position is stored at the same place as the parent position.
        return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
    } else {
        return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
                terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
        return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
                terminalIdFieldPos, terminalId, NOT_A_PROBABILITY, childrenPosFieldPos, childrenPos,
                newSiblingNodePos);
    }
}
+3 −8
Original line number Diff line number Diff line
@@ -29,15 +29,12 @@ class LanguageModelDictContent;

/*
 * This class is used for helping to read nodes of ver4 patricia trie. This class handles moved
 * node and reads node attributes including probability form language model.
 * node and reads node attributes.
 */
class Ver4PatriciaTrieNodeReader : public PtNodeReader {
 public:
    Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer,
            const LanguageModelDictContent *const languageModelDictContent,
            const HeaderPolicy *const headerPolicy)
            : mBuffer(buffer), mLanguageModelDictContent(languageModelDictContent),
              mHeaderPolicy(headerPolicy) {}
    explicit Ver4PatriciaTrieNodeReader(const BufferWithExtendableBuffer *const buffer)
            : mBuffer(buffer) {}

    ~Ver4PatriciaTrieNodeReader() {}

@@ -50,8 +47,6 @@ class Ver4PatriciaTrieNodeReader : public PtNodeReader {
    DISALLOW_COPY_AND_ASSIGN(Ver4PatriciaTrieNodeReader);

    const BufferWithExtendableBuffer *const mBuffer;
    const LanguageModelDictContent *const mLanguageModelDictContent;
    const HeaderPolicy *const mHeaderPolicy;

    const PtNodeParams fetchPtNodeInfoFromBufferAndProcessMovedPtNode(const int ptNodePos,
            const int siblingNodePos) const;
+1 −7
Original line number Diff line number Diff line
@@ -56,13 +56,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
        if (!ptNodeParams.isValid()) {
            break;
        }
        bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
        if (isTerminal && mHeaderPolicy->isDecayingDict()) {
            // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
            // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
            // valid terminal DicNode.
            isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
        }
        const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
        const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
        childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
                wordId, ptNodeParams.getCodePointArrayView());
+1 −2
Original line number Diff line number Diff line
@@ -45,8 +45,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
              mDictBuffer(mBuffers->getWritableTrieBuffer()),
              mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
                      mBuffers->getTerminalPositionLookupTable()),
              mNodeReader(mDictBuffer, mBuffers->getLanguageModelDictContent(), mHeaderPolicy),
              mPtNodeArrayReader(mDictBuffer),
              mNodeReader(mDictBuffer), mPtNodeArrayReader(mDictBuffer),
              mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
                      &mPtNodeArrayReader, &mShortcutPolicy),
              mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
Loading