Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 89a074fa authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use IntArrayView in DynamicPtUpdatingHelper.

Change-Id: Ifa185eba7dd9abee1b35e49fe559be1042ca63d7
parent 7542b8d8
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -268,8 +268,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
        return false;
    }
    const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(),
            codePointArrayView.size(), unigramProperty, &addedNewUnigram)) {
    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
            &addedNewUnigram)) {
        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
            mUnigramCount++;
        }
@@ -283,8 +283,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
            }
            for (const auto &shortcut : unigramProperty->getShortcuts()) {
                if (!mUpdatingHelper.addShortcutTarget(wordPos,
                        shortcut.getTargetCodePoints()->data(),
                        shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
                        CodePointArrayView(*shortcut.getTargetCodePoints()),
                        shortcut.getProbability())) {
                    AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
                            "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
                            shortcut.getProbability());
+2 −2
Original line number Diff line number Diff line
@@ -218,9 +218,9 @@ int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
}

int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
        const int length, const bool forceLowerCaseSearch) {
        const size_t length, const bool forceLowerCaseSearch) {
    int searchCodePoints[length];
    for (int i = 0; i < length; ++i) {
    for (size_t i = 0; i < length; ++i) {
        searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
    }
    while (!isEnd()) {
+4 −4
Original line number Diff line number Diff line
@@ -138,12 +138,12 @@ class DynamicPtReadingHelper {
    }

    // Return code point count exclude the last read node's code points.
    AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
    AK_FORCE_INLINE size_t getPrevTotalCodePointCount() const {
        return mReadingState.mTotalCodePointCountSinceInitialization;
    }

    // Return code point count include the last read node's code points.
    AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
    AK_FORCE_INLINE size_t getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
        return mReadingState.mTotalCodePointCountSinceInitialization
                + ptNodeParams.getCodePointCount();
    }
@@ -214,7 +214,7 @@ class DynamicPtReadingHelper {
    int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
            int *const outCodePoints, int *const outUnigramProbability);

    int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
    int getTerminalPtNodePositionOfWord(const int *const inWord, const size_t length,
            const bool forceLowerCaseSearch);

 private:
@@ -234,7 +234,7 @@ class DynamicPtReadingHelper {
        int mPos;
        // Remaining node count in the current array.
        int mRemainingPtNodeCountInThisArray;
        int mTotalCodePointCountSinceInitialization;
        size_t mTotalCodePointCountSinceInitialization;
        // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
        int mTotalPtNodeIndexInThisArrayChain;
        // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
+48 −57
Original line number Diff line number Diff line
@@ -28,17 +28,16 @@ namespace latinime {

const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;

bool DynamicPtUpdatingHelper::addUnigramWord(
        DynamicPtReadingHelper *const readingHelper,
        const int *const wordCodePoints, const int codePointCount,
        const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readingHelper,
        const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
        bool *const outAddedNewUnigram) {
    int parentPos = NOT_A_DICT_POS;
    while (!readingHelper->isEnd()) {
        const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
        if (!ptNodeParams.isValid()) {
            break;
        }
        const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
        const size_t matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
        if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
                wordCodePoints[matchedCodePointCount])) {
            // The first code point is different from target code point. Skip this node and read
@@ -47,26 +46,25 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
            continue;
        }
        // Check following merged node code points.
        const int nodeCodePointCount = ptNodeParams.getCodePointCount();
        for (int j = 1; j < nodeCodePointCount; ++j) {
            const int nextIndex = matchedCodePointCount + j;
            if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
        const size_t nodeCodePointCount = ptNodeParams.getCodePointArrayView().size();
        for (size_t j = 1; j < nodeCodePointCount; ++j) {
            const size_t nextIndex = matchedCodePointCount + j;
            if (nextIndex >= wordCodePoints.size()
                    || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
                            wordCodePoints[matchedCodePointCount + j])) {
                *outAddedNewUnigram = true;
                return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
                        wordCodePoints + matchedCodePointCount,
                        codePointCount - matchedCodePointCount);
                        wordCodePoints.skip(matchedCodePointCount));
            }
        }
        // All characters are matched.
        if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
        if (wordCodePoints.size() == readingHelper->getTotalCodePointCount(ptNodeParams)) {
            return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
        }
        if (!ptNodeParams.hasChildren()) {
            *outAddedNewUnigram = true;
            return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
                    wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
                    codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
                    wordCodePoints.skip(readingHelper->getTotalCodePointCount(ptNodeParams)));
        }
        // Advance to the children nodes.
        parentPos = ptNodeParams.getHeadPos();
@@ -79,9 +77,8 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
    int pos = readingHelper->getPosOfLastForwardLinkField();
    *outAddedNewUnigram = true;
    return createAndInsertNodeIntoPtNodeArray(parentPos,
            wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
            codePointCount - readingHelper->getPrevTotalCodePointCount(),
            unigramProperty, &pos);
            wordCodePoints.skip(readingHelper->getPrevTotalCodePointCount()), unigramProperty,
            &pos);
}

bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
@@ -120,23 +117,21 @@ bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWord
}

bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
        const int *const targetCodePoints, const int targetCodePointCount,
        const int shortcutProbability) {
        const CodePointArrayView targetCodePoints, const int shortcutProbability) {
    const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
    return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount,
            shortcutProbability);
    return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints.data(),
            targetCodePoints.size(), shortcutProbability);
}

bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
        const int *const nodeCodePoints, const int nodeCodePointCount,
        const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
        const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
        int *const forwardLinkFieldPos) {
    const int newPtNodeArrayPos = mBuffer->getTailPosition();
    if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
            newPtNodeArrayPos, forwardLinkFieldPos)) {
        return false;
    }
    return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
            unigramProperty);
    return createNewPtNodeArrayWithAChildPtNode(parentPos, ptNodeCodePoints, unigramProperty);
}

bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
@@ -153,8 +148,7 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
        const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
                unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                true /* isTerminal */, originalPtNodeParams->getParentPos(),
                originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
                unigramProperty->getProbability()));
                originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability()));
        if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
                unigramProperty, &writingPos)) {
            return false;
@@ -168,17 +162,17 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori

bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
        const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
        const int *const codePoints, const int codePointCount) {
        const CodePointArrayView codePoints) {
    const int newPtNodeArrayPos = mBuffer->getTailPosition();
    if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
        return false;
    }
    return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
            codePointCount, unigramProperty);
            unigramProperty);
}

bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
        const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
        const int parentPtNodePos, const CodePointArrayView ptNodeCodePoints,
        const UnigramProperty *const unigramProperty) {
    int writingPos = mBuffer->getTailPosition();
    if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
@@ -187,8 +181,7 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
    }
    const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
            unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
            parentPtNodePos, nodeCodePointCount, nodeCodePoints,
            unigramProperty->getProbability()));
            parentPtNodePos, ptNodeCodePoints, unigramProperty->getProbability()));
    if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
            unigramProperty, &writingPos)) {
        return false;
@@ -202,9 +195,9 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(

// Returns whether the dictionary updating was succeeded or not.
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
        const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
        const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
        const int newNodeCodePointCount) {
        const PtNodeParams *const reallocatingPtNodeParams, const size_t overlappingCodePointCount,
        const UnigramProperty *const unigramProperty,
        const CodePointArrayView newPtNodeCodePoints) {
    // When addsExtraChild is true, split the reallocating PtNode and add new child.
    // Reallocating PtNode: abcde, newNode: abcxy.
    // abc (1st, not terminal) __ de (2nd)
@@ -212,16 +205,18 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
    // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
    // Reallocating PtNode: abcde, newNode: abc.
    // abc (1st, terminal) __ de (2nd)
    const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
    const bool addsExtraChild = newPtNodeCodePoints.size() > overlappingCodePointCount;
    const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
    int writingPos = firstPartOfReallocatedPtNodePos;
    // Write the 1st part of the reallocating node. The children position will be updated later
    // with actual children position.
    const CodePointArrayView firstPtNodeCodePoints =
            reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount);
    if (addsExtraChild) {
        const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
                false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
                reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
                reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
                reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints,
                NOT_A_PROBABILITY));
        if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
            return false;
        }
@@ -229,8 +224,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
        const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
                unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
                overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
                unigramProperty->getProbability()));
                firstPtNodeCodePoints, unigramProperty->getProbability()));
        if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
                unigramProperty, &writingPos)) {
            return false;
@@ -248,8 +242,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
    const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
            reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
            reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
            reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
            reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
            reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount),
            reallocatingPtNodeParams->getProbability()));
    if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
        return false;
@@ -258,8 +251,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
        const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
                unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
                true /* isTerminal */, firstPartOfReallocatedPtNodePos,
                newNodeCodePointCount - overlappingCodePointCount,
                newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
                newPtNodeCodePoints.skip(overlappingCodePointCount),
                unigramProperty->getProbability()));
        if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
                unigramProperty, &writingPos)) {
            return false;
@@ -282,26 +275,24 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
}

const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
        const PtNodeParams *const originalPtNodeParams,
        const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
        const int codePointCount, const int *const codePoints, const int probability) const {
        const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
        const bool isBlacklisted, const bool isTerminal, const int parentPos,
        const CodePointArrayView codePoints, const int probability) const {
    const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
            isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
            false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
            false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
            CHILDREN_POSITION_FIELD_SIZE);
    return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
            probability);
    return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability);
}

const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
        const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
        const int parentPos, const int codePointCount, const int *const codePoints,
        const int probability) const {
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord,
        const bool isBlacklisted, const bool isTerminal, const int parentPos,
        const CodePointArrayView codePoints, const int probability) const {
    const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
            isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
            false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
            false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
            CHILDREN_POSITION_FIELD_SIZE);
    return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
    return PtNodeParams(flags, parentPos, codePoints, probability);
}

} // namespace latinime
+17 −18
Original line number Diff line number Diff line
@@ -40,8 +40,8 @@ class DynamicPtUpdatingHelper {

    // Add a word to the dictionary. If the word already exists, update the probability.
    bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
            const int *const wordCodePoints, const int codePointCount,
            const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
            const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
            bool *const outAddedNewUnigram);

    // TODO: Remove after stopping supporting v402.
    // Add an n-gram entry.
@@ -53,8 +53,8 @@ class DynamicPtUpdatingHelper {
    bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);

    // Add a shortcut target.
    bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
            const int targetCodePointCount, const int shortcutProbability);
    bool addShortcutTarget(const int wordPos, const CodePointArrayView targetCodePoints,
            const int shortcutProbability);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
@@ -65,33 +65,32 @@ class DynamicPtUpdatingHelper {
    const PtNodeReader *const mPtNodeReader;
    PtNodeWriter *const mPtNodeWriter;

    bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
            const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
    bool createAndInsertNodeIntoPtNodeArray(const int parentPos,
            const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
            int *const forwardLinkFieldPos);

    bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
            const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);

    bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
            const UnigramProperty *const unigramProperty, const int *const codePoints,
            const int codePointCount);
            const UnigramProperty *const unigramProperty,
            const CodePointArrayView remainingCodePoints);

    bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
            const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
    bool createNewPtNodeArrayWithAChildPtNode(const int parentPos,
            const CodePointArrayView ptNodeCodePoints,
            const UnigramProperty *const unigramProperty);

    bool reallocatePtNodeAndAddNewPtNodes(
            const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
            const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
            const int newNodeCodePointCount);
    bool reallocatePtNodeAndAddNewPtNodes(const PtNodeParams *const reallocatingPtNodeParams,
            const size_t overlappingCodePointCount, const UnigramProperty *const unigramProperty,
            const CodePointArrayView newPtNodeCodePoints);

    const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
            const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
            const int parentPos, const int codePointCount,
            const int *const codePoints, const int probability) const;
            const int parentPos, const CodePointArrayView codePoints, const int probability) const;

    const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
            const bool isTerminal, const int parentPos,
            const int codePointCount, const int *const codePoints, const int probability) const;
            const bool isTerminal, const int parentPos, const CodePointArrayView codePoints,
            const int probability) const;
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
Loading