Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6b74f516 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Refactoring: Use UnigramProperty to add/update unigram."

parents 4f5ea4df b636e25e
Loading
Loading
Loading
Loading
+41 −44
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@

#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"

#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
@@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;

bool DynamicPtUpdatingHelper::addUnigramWord(
        DynamicPtReadingHelper *const readingHelper,
        const int *const wordCodePoints, const int codePointCount, const int probability,
        const bool isNotAWord, const bool isBlacklisted, const int timestamp,
        bool *const outAddedNewUnigram) {
        const int *const wordCodePoints, const int codePointCount,
        const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
    int parentPos = NOT_A_DICT_POS;
    while (!readingHelper->isEnd()) {
        const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
@@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
            if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
                    wordCodePoints[matchedCodePointCount + j])) {
                *outAddedNewUnigram = true;
                return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
                        probability, timestamp, wordCodePoints + matchedCodePointCount,
                return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
                        wordCodePoints + matchedCodePointCount,
                        codePointCount - matchedCodePointCount);
            }
        }
        // All characters are matched.
        if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
            return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
                    timestamp, outAddedNewUnigram);
            return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
        }
        if (!ptNodeParams.hasChildren()) {
            *outAddedNewUnigram = true;
            return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
                    isNotAWord, isBlacklisted, probability, timestamp,
            return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
                    wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
                    codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
        }
@@ -83,7 +81,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
    return createAndInsertNodeIntoPtNodeArray(parentPos,
            wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
            codePointCount - readingHelper->getPrevTotalCodePointCount(),
            isNotAWord, isBlacklisted, probability, timestamp, &pos);
            unigramProperty, &pos);
}

bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,

bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
        const int *const nodeCodePoints, const int nodeCodePointCount,
        const bool isNotAWord, const bool isBlacklisted, const int probability,
        const int timestamp,  int *const forwardLinkFieldPos) {
        const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
    const int newPtNodeArrayPos = mBuffer->getTailPosition();
    if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
            newPtNodeArrayPos, forwardLinkFieldPos)) {
        return false;
    }
    return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
            isNotAWord, isBlacklisted, probability, timestamp);
            unigramProperty);
}

bool DynamicPtUpdatingHelper::setPtNodeProbability(
        const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
        const bool isBlacklisted, const int probability, const int timestamp,
        bool *const outAddedNewUnigram) {
bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
        const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
    if (originalPtNodeParams->isTerminal()) {
        // Overwrites the probability.
        *outAddedNewUnigram = false;
        return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp);
        return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty);
    } else {
        // Make the node terminal and write the probability.
        *outAddedNewUnigram = true;
        const int movedPos = mBuffer->getTailPosition();
        int writingPos = movedPos;
        const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
                isNotAWord, isBlacklisted, true /* isTerminal */,
                originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
                originalPtNodeParams->getCodePoints(), probability));
                unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                true /* isTerminal */, originalPtNodeParams->getParentPos(),
                originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
                unigramProperty->getProbability()));
        if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
                timestamp, &writingPos)) {
                unigramProperty, &writingPos)) {
            return false;
        }
        if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) {
@@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(
}

bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
        const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
        const bool isBlacklisted, const int probability, const int timestamp,
        const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
        const int *const codePoints, const int codePointCount) {
    const int newPtNodeArrayPos = mBuffer->getTailPosition();
    if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
        return false;
    }
    return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
            codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
            codePointCount, unigramProperty);
}

bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
        const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
        const bool isNotAWord, const bool isBlacklisted, const int probability,
        const int timestamp) {
        const UnigramProperty *const unigramProperty) {
    int writingPos = mBuffer->getTailPosition();
    if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
            1 /* arraySize */, &writingPos)) {
        return false;
    }
    const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
            isNotAWord, isBlacklisted, true /* isTerminal */,
            parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
    if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
            &writingPos)) {
            unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
            parentPtNodePos, nodeCodePointCount, nodeCodePoints,
            unigramProperty->getProbability()));
    if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
            unigramProperty, &writingPos)) {
        return false;
    }
    if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
@@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
        const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
        const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
        const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
        const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
        const int newNodeCodePointCount) {
    // When addsExtraChild is true, split the reallocating PtNode and add new child.
    // Reallocating PtNode: abcde, newNode: abcxy.
    // abc (1st, not terminal) __ de (2nd)
    //                         \_ xy (extra child, terminal)
    // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
    // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
    // Reallocating PtNode: abcde, newNode: abc.
    // abc (1st, terminal) __ de (2nd)
    const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
@@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
        }
    } else {
        const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
                isNotAWord, isBlacklisted, true /* isTerminal */,
                reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
                reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
                unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
                overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
                unigramProperty->getProbability()));
        if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
                timestamp, &writingPos)) {
                unigramProperty, &writingPos)) {
            return false;
        }
    }
@@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
    }
    if (addsExtraChild) {
        const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
                isNotAWord, isBlacklisted, true /* isTerminal */,
                firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
                newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
                unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                true /* isTerminal */, firstPartOfReallocatedPtNodePos,
                newNodeCodePointCount - overlappingCodePointCount,
                newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
        if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
                timestamp, &writingPos)) {
                unigramProperty, &writingPos)) {
            return false;
        }
    }
@@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}

const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
        const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
        const bool isBlacklisted, const bool isTerminal, const int parentPos,
        const PtNodeParams *const originalPtNodeParams,
        const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
        const int codePointCount, const int *const codePoints, const int probability) const {
    const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
            isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
+11 −14
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ class BufferWithExtendableBuffer;
class DynamicPtReadingHelper;
class PtNodeReader;
class PtNodeWriter;
class UnigramProperty;

class DynamicPtUpdatingHelper {
 public:
@@ -37,9 +38,8 @@ class DynamicPtUpdatingHelper {

    // Add a word to the dictionary. If the word already exists, update the probability.
    bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
            const int *const wordCodePoints, const int codePointCount, const int probability,
            const bool isNotAWord, const bool isBlacklisted, const int timestamp,
            bool *const outAddedNewUnigram);
            const int *const wordCodePoints, const int codePointCount,
            const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);

    // Add a bigram relation from word0Pos to word1Pos.
    bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
@@ -62,25 +62,22 @@ class DynamicPtUpdatingHelper {
    PtNodeWriter *const mPtNodeWriter;

    bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
            const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
            const int probability, const int timestamp, int *const forwardLinkFieldPos);
            const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
            int *const forwardLinkFieldPos);

    bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
            const bool isBlacklisted, const int probability, const int timestamp,
            bool *const outAddedNewUnigram);
    bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
            const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);

    bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
            const bool isNotAWord, const bool isBlacklisted, const int probability,
            const int timestamp, const int *const codePoints, const int codePointCount);
            const UnigramProperty *const unigramProperty, const int *const codePoints,
            const int codePointCount);

    bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
            const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
            const int probability, const int timestamp);
            const int nodeCodePointCount, const UnigramProperty *const unigramProperty);

    bool reallocatePtNodeAndAddNewPtNodes(
            const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
            const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
            const int timestamp, const int *const newNodeCodePoints,
            const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
            const int newNodeCodePointCount);

    const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
+5 −3
Original line number Diff line number Diff line
@@ -24,6 +24,8 @@

namespace latinime {

class UnigramProperty;

// Interface class used to write PtNode information.
class PtNodeWriter {
 public:
@@ -51,8 +53,8 @@ class PtNodeWriter {
    virtual bool markPtNodeAsWillBecomeNonTerminal(
            const PtNodeParams *const toBeUpdatedPtNodeParams) = 0;

    virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
            const int probability, const int timestamp) = 0;
    virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
            const UnigramProperty *const unigramProperty) = 0;

    virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
            const PtNodeParams *const toBeUpdatedPtNodeParams,
@@ -65,7 +67,7 @@ class PtNodeWriter {
            int *const ptNodeWritingPos) = 0;

    virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
            const int timestamp, int *const ptNodeWritingPos) = 0;
            const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0;

    virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
            const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
+16 −10
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@

#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"

#include "suggest/core/dictionary/property/unigram_property.h"
#include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h"
@@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal(
            &writingPos);
}

bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
        const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability,
        const int timestamp) {
bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty(
        const PtNodeParams *const toBeUpdatedPtNodeParams,
        const UnigramProperty *const unigramProperty) {
    // Update probability and historical information.
    // TODO: Update other information in the unigram property.
    if (!toBeUpdatedPtNodeParams->isTerminal()) {
        return false;
    }
@@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability(
            mBuffers->getProbabilityDictContent()->getProbabilityEntry(
                    toBeUpdatedPtNodeParams->getTerminalId());
    const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry,
            newProbability, timestamp);
            unigramProperty);
    return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(
            toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry);
}
@@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition(


bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
        const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) {
        const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty,
        int *const ptNodeWritingPos) {
    int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
    if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId,
            ptNodeWritingPos)) {
@@ -213,7 +217,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition(
    // Write probability.
    ProbabilityEntry newProbabilityEntry;
    const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom(
            &newProbabilityEntry, ptNodeParams->getProbability(), timestamp);
            &newProbabilityEntry, unigramProperty);
    return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId,
            &probabilityEntryToWrite);
}
@@ -379,18 +383,20 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition(
}

const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom(
        const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
        const int timestamp) const {
        const ProbabilityEntry *const originalProbabilityEntry,
        const UnigramProperty *const unigramProperty) const {
    // TODO: Consolidate historical info and probability.
    if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
        const HistoricalInfo updatedHistoricalInfo =
                ForgettingCurveUtils::createUpdatedHistoricalInfo(
                        originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp,
                        originalProbabilityEntry->getHistoricalInfo(),
                        unigramProperty->getProbability(), unigramProperty->getTimestamp(),
                        mHeaderPolicy);
        return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo(
                &updatedHistoricalInfo);
    } else {
        return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability);
        return originalProbabilityEntry->createEntryWithUpdatedProbability(
                unigramProperty->getProbability());
    }
}

+7 −6
Original line number Diff line number Diff line
@@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
    virtual bool markPtNodeAsWillBecomeNonTerminal(
            const PtNodeParams *const toBeUpdatedPtNodeParams);

    virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams,
            const int newProbability, const int timestamp);
    virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams,
            const UnigramProperty *const unigramProperty);

    virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC(
            const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode);
@@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
            int *const ptNodeWritingPos);

    virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams,
            const int timestamp, int *const ptNodeWritingPos);
            const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos);

    virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams,
            const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp,
@@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter {
            const PtNodeParams *const ptNodeParams, int *const outTerminalId,
            int *const ptNodeWritingPos);

    // Create updated probability entry using given probability and timestamp. In addition to the
    // Create updated probability entry using given unigram property. In addition to the
    // probability, this method updates historical information if needed.
    // TODO: Update flags belonging to the unigram property.
    const ProbabilityEntry createUpdatedEntryFrom(
            const ProbabilityEntry *const originalProbabilityEntry, const int newProbability,
            const int timestamp) const;
            const ProbabilityEntry *const originalProbabilityEntry,
            const UnigramProperty *const unigramProperty) const;

    bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord,
            const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams,
Loading