Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 180e7b4c authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use ReadOnlyByteArrayView in PatriciaTriePolicy.

Change-Id: I4ef01c0af73f27c256cc1a719343be836913dc71
parent fc7d0540
Loading
Loading
Loading
Loading
+44 −35
Original line number Original line Diff line number Diff line
@@ -37,19 +37,19 @@ void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNo
        return;
        return;
    }
    }
    int nextPos = dicNode->getChildrenPtNodeArrayPos();
    int nextPos = dicNode->getChildrenPtNodeArrayPos();
    if (nextPos < 0 || nextPos >= mDictBufferSize) {
    if (!isValidPos(nextPos)) {
        AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
        AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %zd",
                nextPos, mDictBufferSize);
                nextPos, mBuffer.size());
        mIsCorrupted = true;
        mIsCorrupted = true;
        ASSERT(false);
        ASSERT(false);
        return;
        return;
    }
    }
    const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
    const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
            mDictRoot, &nextPos);
            mBuffer.data(), &nextPos);
    for (int i = 0; i < childCount; i++) {
    for (int i = 0; i < childCount; i++) {
        if (nextPos < 0 || nextPos >= mDictBufferSize) {
        if (!isValidPos(nextPos)) {
            AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
            AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %zd, childCount: %d / %d",
                    nextPos, mDictBufferSize, i, childCount);
                    nextPos, mBuffer.size(), i, childCount);
            mIsCorrupted = true;
            mIsCorrupted = true;
            ASSERT(false);
            ASSERT(false);
            return;
            return;
@@ -91,56 +91,57 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
        int lastCandidatePtNodePos = 0;
        int lastCandidatePtNodePos = 0;
        // Let's loop through PtNodes in this PtNode array searching for either the terminal
        // Let's loop through PtNodes in this PtNode array searching for either the terminal
        // or one of its ascendants.
        // or one of its ascendants.
        if (pos < 0 || pos >= mDictBufferSize) {
        if (!isValidPos(pos)) {
            AKLOGE("PtNode array position is invalid. pos: %d, dict size: %d",
            AKLOGE("PtNode array position is invalid. pos: %d, dict size: %zd",
                    pos, mDictBufferSize);
                    pos, mBuffer.size());
            mIsCorrupted = true;
            mIsCorrupted = true;
            ASSERT(false);
            ASSERT(false);
            *outUnigramProbability = NOT_A_PROBABILITY;
            *outUnigramProbability = NOT_A_PROBABILITY;
            return 0;
            return 0;
        }
        }
        for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
        for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
                mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
                mBuffer.data(), &pos); ptNodeCount > 0; --ptNodeCount) {
            const int startPos = pos;
            const int startPos = pos;
            if (pos < 0 || pos >= mDictBufferSize) {
            if (!isValidPos(pos)) {
                AKLOGE("PtNode position is invalid. pos: %d, dict size: %d", pos, mDictBufferSize);
                AKLOGE("PtNode position is invalid. pos: %d, dict size: %zd", pos, mBuffer.size());
                mIsCorrupted = true;
                mIsCorrupted = true;
                ASSERT(false);
                ASSERT(false);
                *outUnigramProbability = NOT_A_PROBABILITY;
                *outUnigramProbability = NOT_A_PROBABILITY;
                return 0;
                return 0;
            }
            }
            const PatriciaTrieReadingUtils::NodeFlags flags =
            const PatriciaTrieReadingUtils::NodeFlags flags =
                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
                    PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mBuffer.data(), &pos);
            const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
            const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                    mDictRoot, &pos);
                    mBuffer.data(), &pos);
            if (ptNodePos == startPos) {
            if (ptNodePos == startPos) {
                // We found the position. Copy the rest of the code points in the buffer and return
                // We found the position. Copy the rest of the code points in the buffer and return
                // the length.
                // the length.
                outCodePoints[wordPos] = character;
                outCodePoints[wordPos] = character;
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                    int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                    int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                            mDictRoot, &pos);
                            mBuffer.data(), &pos);
                    // We count code points in order to avoid infinite loops if the file is broken
                    // We count code points in order to avoid infinite loops if the file is broken
                    // or if there is some other bug
                    // or if there is some other bug
                    int charCount = maxCodePointCount;
                    int charCount = maxCodePointCount;
                    while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
                    while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
                        outCodePoints[++wordPos] = nextChar;
                        outCodePoints[++wordPos] = nextChar;
                        nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                        nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                mDictRoot, &pos);
                                mBuffer.data(), &pos);
                    }
                    }
                }
                }
                *outUnigramProbability =
                *outUnigramProbability =
                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(),
                                &pos);
                                &pos);
                return ++wordPos;
                return ++wordPos;
            }
            }
            // We need to skip past this PtNode, so skip any remaining code points after the
            // We need to skip past this PtNode, so skip any remaining code points after the
            // first and possibly the probability.
            // first and possibly the probability.
            if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
            if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
                PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
                PatriciaTrieReadingUtils::skipCharacters(mBuffer.data(), flags, MAX_WORD_LENGTH,
                        &pos);
            }
            }
            if (PatriciaTrieReadingUtils::isTerminal(flags)) {
            if (PatriciaTrieReadingUtils::isTerminal(flags)) {
                PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
                PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(), &pos);
            }
            }
            // The fact that this PtNode has children is very important. Since we already know
            // The fact that this PtNode has children is very important. Since we already know
            // that this PtNode does not match, if it has no children we know it is irrelevant
            // that this PtNode does not match, if it has no children we know it is irrelevant
@@ -155,7 +156,8 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                int currentPos = pos;
                int currentPos = pos;
                // Here comes the tricky part. First, read the children position.
                // Here comes the tricky part. First, read the children position.
                const int childrenPos = PatriciaTrieReadingUtils
                const int childrenPos = PatriciaTrieReadingUtils
                        ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
                        ::readChildrenPositionAndAdvancePosition(mBuffer.data(), flags,
                                &currentPos);
                if (childrenPos > ptNodePos) {
                if (childrenPos > ptNodePos) {
                    // If the children pos is greater than the position, it means the previous
                    // If the children pos is greater than the position, it means the previous
                    // PtNode, which position is stored in lastCandidatePtNodePos, was the right
                    // PtNode, which position is stored in lastCandidatePtNodePos, was the right
@@ -185,30 +187,30 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                if (0 != lastCandidatePtNodePos) {
                if (0 != lastCandidatePtNodePos) {
                    const PatriciaTrieReadingUtils::NodeFlags lastFlags =
                    const PatriciaTrieReadingUtils::NodeFlags lastFlags =
                            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
                            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
                                    mDictRoot, &lastCandidatePtNodePos);
                                    mBuffer.data(), &lastCandidatePtNodePos);
                    const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                    const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                            mDictRoot, &lastCandidatePtNodePos);
                            mBuffer.data(), &lastCandidatePtNodePos);
                    // We copy all the characters in this PtNode to the buffer
                    // We copy all the characters in this PtNode to the buffer
                    outCodePoints[wordPos] = lastChar;
                    outCodePoints[wordPos] = lastChar;
                    if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
                    if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
                        int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                        int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                mDictRoot, &lastCandidatePtNodePos);
                                mBuffer.data(), &lastCandidatePtNodePos);
                        int charCount = maxCodePointCount;
                        int charCount = maxCodePointCount;
                        while (-1 != nextChar && --charCount > 0) {
                        while (-1 != nextChar && --charCount > 0) {
                            outCodePoints[++wordPos] = nextChar;
                            outCodePoints[++wordPos] = nextChar;
                            nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                            nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
                                    mDictRoot, &lastCandidatePtNodePos);
                                    mBuffer.data(), &lastCandidatePtNodePos);
                        }
                        }
                    }
                    }
                    ++wordPos;
                    ++wordPos;
                    // Now we only need to branch to the children address. Skip the probability if
                    // Now we only need to branch to the children address. Skip the probability if
                    // it's there, read pos, and break to resume the search at pos.
                    // it's there, read pos, and break to resume the search at pos.
                    if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
                    if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
                        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mBuffer.data(),
                                &lastCandidatePtNodePos);
                                &lastCandidatePtNodePos);
                    }
                    }
                    pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                    pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                            mDictRoot, lastFlags, &lastCandidatePtNodePos);
                            mBuffer.data(), lastFlags, &lastCandidatePtNodePos);
                    break;
                    break;
                } else {
                } else {
                    // Here is a little tricky part: we come here if we found out that all children
                    // Here is a little tricky part: we come here if we found out that all children
@@ -220,14 +222,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                    // ready to start the next one.
                    // ready to start the next one.
                    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                                mDictRoot, flags, &pos);
                                mBuffer.data(), flags, &pos);
                    }
                    }
                    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
                    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
                        mShortcutListPolicy.skipAllShortcuts(&pos);
                        mShortcutListPolicy.skipAllShortcuts(&pos);
                    }
                    }
                    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
                    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
                        if (!mBigramListPolicy.skipAllBigrams(&pos)) {
                        if (!mBigramListPolicy.skipAllBigrams(&pos)) {
                            AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
                            AKLOGE("Cannot skip bigrams. BufSize: %zd, pos: %d.", mBuffer.size(),
                                    pos);
                                    pos);
                            mIsCorrupted = true;
                            mIsCorrupted = true;
                            ASSERT(false);
                            ASSERT(false);
@@ -244,14 +246,14 @@ int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
                // our pos is after the end of this PtNode, at the start of the next one.
                // our pos is after the end of this PtNode, at the start of the next one.
                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                    PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                            mDictRoot, flags, &pos);
                            mBuffer.data(), flags, &pos);
                }
                }
                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
                if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
                    mShortcutListPolicy.skipAllShortcuts(&pos);
                    mShortcutListPolicy.skipAllShortcuts(&pos);
                }
                }
                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
                if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
                    if (!mBigramListPolicy.skipAllBigrams(&pos)) {
                    if (!mBigramListPolicy.skipAllBigrams(&pos)) {
                        AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
                        AKLOGE("Cannot skip bigrams. BufSize: %zd, pos: %d.", mBuffer.size(), pos);
                        mIsCorrupted = true;
                        mIsCorrupted = true;
                        ASSERT(false);
                        ASSERT(false);
                        *outUnigramProbability = NOT_A_PROBABILITY;
                        *outUnigramProbability = NOT_A_PROBABILITY;
@@ -402,7 +404,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
    int shortcutPos = NOT_A_DICT_POS;
    int shortcutPos = NOT_A_DICT_POS;
    int bigramPos = NOT_A_DICT_POS;
    int bigramPos = NOT_A_DICT_POS;
    int siblingPos = NOT_A_DICT_POS;
    int siblingPos = NOT_A_DICT_POS;
    PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, &mShortcutListPolicy,
    PatriciaTrieReadingUtils::readPtNodeInfo(mBuffer.data(), ptNodePos, &mShortcutListPolicy,
            &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
            &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
    // Skip PtNodes don't start with Unicode code point because they represent non-word information.
    // Skip PtNodes don't start with Unicode code point because they represent non-word information.
@@ -452,14 +454,16 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    if (shortcutPos != NOT_A_DICT_POS) {
    if (shortcutPos != NOT_A_DICT_POS) {
        int shortcutTargetCodePoints[MAX_WORD_LENGTH];
        int shortcutTargetCodePoints[MAX_WORD_LENGTH];
        ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
        ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mBuffer.data(),
                &shortcutPos);
        bool hasNext = true;
        bool hasNext = true;
        while (hasNext) {
        while (hasNext) {
            const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
            const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
                    ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
                    ShortcutListReadingUtils::getFlagsAndForwardPointer(mBuffer.data(),
                            &shortcutPos);
            hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
            hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
            const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
            const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
                    mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
                    mBuffer.data(), MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
            const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
            const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
                    shortcutTargetCodePoints + shortcutTargetLength);
                    shortcutTargetCodePoints + shortcutTargetLength);
            const int shortcutProbability =
            const int shortcutProbability =
@@ -512,4 +516,9 @@ int PatriciaTriePolicy::getWordIdFromTerminalPtNodePos(const int ptNodePos) cons
int PatriciaTriePolicy::getTerminalPtNodePosFromWordId(const int wordId) const {
int PatriciaTriePolicy::getTerminalPtNodePosFromWordId(const int wordId) const {
    return wordId == NOT_A_WORD_ID ? NOT_A_DICT_POS : wordId;
    return wordId == NOT_A_WORD_ID ? NOT_A_DICT_POS : wordId;
}
}

bool PatriciaTriePolicy::isValidPos(const int pos) const {
    return pos >= 0 && pos < static_cast<int>(mBuffer.size());
}

} // namespace latinime
} // namespace latinime
+8 −9
Original line number Original line Diff line number Diff line
@@ -44,13 +44,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
            : mMmappedBuffer(std::move(mmappedBuffer)),
            : mMmappedBuffer(std::move(mmappedBuffer)),
              mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
              mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
                      FormatUtils::VERSION_2),
                      FormatUtils::VERSION_2),
              mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
              mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
                      + mHeaderPolicy.getSize()),
              mBigramListPolicy(mBuffer.data(), mBuffer.size()),
              mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
              mShortcutListPolicy(mBuffer.data()),
                      - mHeaderPolicy.getSize()),
              mPtNodeReader(mBuffer.data(), mBuffer.size(), &mBigramListPolicy,
              mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
                      &mShortcutListPolicy),
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
              mPtNodeArrayReader(mBuffer.data(), mBuffer.size()),
              mPtNodeArrayReader(mDictRoot, mDictBufferSize),
              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
              mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}


    AK_FORCE_INLINE int getRootPosition() const {
    AK_FORCE_INLINE int getRootPosition() const {
@@ -149,8 +148,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {


    const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
    const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
    const HeaderPolicy mHeaderPolicy;
    const HeaderPolicy mHeaderPolicy;
    const uint8_t *const mDictRoot;
    const ReadOnlyByteArrayView mBuffer;
    const int mDictBufferSize;
    const BigramListPolicy mBigramListPolicy;
    const BigramListPolicy mBigramListPolicy;
    const ShortcutListPolicy mShortcutListPolicy;
    const ShortcutListPolicy mShortcutListPolicy;
    const Ver2ParticiaTrieNodeReader mPtNodeReader;
    const Ver2ParticiaTrieNodeReader mPtNodeReader;
@@ -166,6 +164,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    int getTerminalPtNodePosFromWordId(const int wordId) const;
    int getTerminalPtNodePosFromWordId(const int wordId) const;
    const WordAttributes getWordAttributes(const int probability,
    const WordAttributes getWordAttributes(const int probability,
            const PtNodeParams &ptNodeParams) const;
            const PtNodeParams &ptNodeParams) const;
    bool isValidPos(const int pos) const;
};
};
} // namespace latinime
} // namespace latinime
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
#endif // LATINIME_PATRICIA_TRIE_POLICY_H
+7 −0
Original line number Original line Diff line number Diff line
@@ -42,6 +42,13 @@ class ReadOnlyByteArrayView {
        return mPtr;
        return mPtr;
    }
    }


    AK_FORCE_INLINE const ReadOnlyByteArrayView skip(const size_t n) const {
        if (mSize <= n) {
            return ReadOnlyByteArrayView();
        }
        return ReadOnlyByteArrayView(mPtr + n, mSize - n);
    }

 private:
 private:
    DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView);
    DISALLOW_ASSIGNMENT_OPERATOR(ReadOnlyByteArrayView);