Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6379a4de authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi
Browse files

Reduce the number of arguments required to initialize dic_node.

Bug: 6669677

Change-Id: I52249b83f72560d8f5ab028da5cfb5c50f6e40b8
parent a690f3bc
Loading
Loading
Loading
Loading
+21 −28
Original line number Diff line number Diff line
@@ -109,12 +109,14 @@ class DicNode {

    // TODO: minimize arguments by looking binary_format
    // Init for root with prevWordNodePos which is used for bigram
    void initAsRoot(const int pos, const int childrenPos, const int childrenCount,
            const int prevWordNodePos) {
    void initAsRoot(const int pos, const int childrenPos, const int prevWordNodePos) {
        mIsUsed = true;
        mIsCachedForNextSuggestion = false;
        mDicNodeProperties.init(
                pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
                pos, 0 /* flags */, childrenPos, 0 /* attributesPos */,
                NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
                false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
                0 /* terminalDepth */);
        mDicNodeState.init(prevWordNodePos);
        PROF_NODE_RESET(mProfiler);
    }
@@ -130,12 +132,14 @@ class DicNode {

    // TODO: minimize arguments by looking binary_format
    // Init for root with previous word
    void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos,
            const int childrenCount) {
    void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos) {
        mIsUsed = true;
        mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
        mDicNodeProperties.init(
                pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
                pos,  0 /* flags */, childrenPos, 0 /* attributesPos */,
                NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
                false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
                0 /* terminalDepth */);
        // TODO: Move to dicNodeState?
        mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
        mDicNodeState.mDicNodeStateInput.init(
@@ -157,19 +161,18 @@ class DicNode {

    // TODO: minimize arguments by looking binary_format
    void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
            const int attributesPos, const int siblingPos, const int nodeCodePoint,
            const int childrenCount, const int probability, const int bigramProbability,
            const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
            const uint16_t additionalSubwordLength, const int *additionalSubword) {
            const int attributesPos, const int probability, const bool isTerminal,
            const bool hasChildren, const uint16_t mergedNodeCodePointCount,
            const int *const mergedNodeCodePoints) {
        mIsUsed = true;
        uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
        mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
        const uint16_t newLeavingDepth = static_cast<uint16_t>(
                dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
        mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint,
                childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars,
                hasChildren, newDepth, newLeavingDepth);
        mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword);
                dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
        mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0],
                probability, isTerminal, hasChildren, newDepth, newLeavingDepth);
        mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
                mergedNodeCodePoints);
        PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
    }

@@ -193,8 +196,8 @@ class DicNode {
    }

    bool isLeavingNode() const {
        ASSERT(getNodeCodePointCount() <= getLeavingDepth());
        return getNodeCodePointCount() == getLeavingDepth();
        ASSERT(getNodeCodePointCount() <= mDicNodeProperties.getLeavingDepth());
        return getNodeCodePointCount() == mDicNodeProperties.getLeavingDepth();
    }

    AK_FORCE_INLINE bool isFirstLetter() const {
@@ -256,12 +259,6 @@ class DicNode {
        return mDicNodeProperties.getChildrenPos();
    }

    // Used in DicNodeUtils
    int getChildrenCount() const {
        return mDicNodeProperties.getChildrenCount();
    }

    // Used in DicNodeUtils
    int getProbability() const {
        return mDicNodeProperties.getProbability();
    }
@@ -280,10 +277,6 @@ class DicNode {
        return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
    }

    uint16_t getLeavingDepth() const {
        return mDicNodeProperties.getLeavingDepth();
    }

    bool isTotalInputSizeExceedingLimit() const {
        const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
        const int currentWordDepth = getNodeCodePointCount();
@@ -370,7 +363,7 @@ class DicNode {
    }

    AK_FORCE_INLINE const int *getOutputWordBuf() const {
        return mDicNodeState.mDicNodeStateOutput.mWordBuf;
        return mDicNodeState.mDicNodeStateOutput.mCodePointsBuf;
    }

    int getPrevCodePointG(int pointerId) const {
+9 −41
Original line number Diff line number Diff line
@@ -27,37 +27,31 @@ namespace latinime {
/**
 * Node for traversing the lexicon trie.
 */
// TODO: Introduce a dictionary node class which has attribute members required to understand the
// dictionary structure.
class DicNodeProperties {
 public:
    AK_FORCE_INLINE DicNodeProperties()
            : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0),
              mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0),
              mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false),
              mHasChildren(false) {
    }
            : mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
              mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false),
              mHasChildren(false) {}

    virtual ~DicNodeProperties() {}

    // Should be called only once per DicNode is initialized.
    void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
            const int siblingPos, const int nodeCodePoint, const int childrenCount,
            const int probability, const int bigramProbability, const bool isTerminal,
            const bool hasMultipleChars, const bool hasChildren, const uint16_t depth,
            const uint16_t terminalDepth) {
            const int nodeCodePoint, const int probability, const bool isTerminal,
            const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) {
        mPos = pos;
        mFlags = flags;
        mChildrenPos = childrenPos;
        mAttributesPos = attributesPos;
        mSiblingPos = siblingPos;
        mNodeCodePoint = nodeCodePoint;
        mChildrenCount = childrenCount;
        mProbability = probability;
        mBigramProbability = bigramProbability;
        mIsTerminal = isTerminal;
        mHasMultipleChars = hasMultipleChars;
        mHasChildren = hasChildren;
        mDepth = depth;
        mLeavingDepth = terminalDepth;
        mLeavingDepth = leavingDepth;
    }

    // Init for copy
@@ -66,13 +60,9 @@ class DicNodeProperties {
        mFlags = nodeProp->mFlags;
        mChildrenPos = nodeProp->mChildrenPos;
        mAttributesPos = nodeProp->mAttributesPos;
        mSiblingPos = nodeProp->mSiblingPos;
        mNodeCodePoint = nodeProp->mNodeCodePoint;
        mChildrenCount = nodeProp->mChildrenCount;
        mProbability = nodeProp->mProbability;
        mBigramProbability = nodeProp->mBigramProbability;
        mIsTerminal = nodeProp->mIsTerminal;
        mHasMultipleChars = nodeProp->mHasMultipleChars;
        mHasChildren = nodeProp->mHasChildren;
        mDepth = nodeProp->mDepth;
        mLeavingDepth = nodeProp->mLeavingDepth;
@@ -84,13 +74,9 @@ class DicNodeProperties {
        mFlags = nodeProp->mFlags;
        mChildrenPos = nodeProp->mChildrenPos;
        mAttributesPos = nodeProp->mAttributesPos;
        mSiblingPos = nodeProp->mSiblingPos;
        mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
        mChildrenCount = nodeProp->mChildrenCount;
        mProbability = nodeProp->mProbability;
        mBigramProbability = nodeProp->mBigramProbability;
        mIsTerminal = nodeProp->mIsTerminal;
        mHasMultipleChars = nodeProp->mHasMultipleChars;
        mHasChildren = nodeProp->mHasChildren;
        mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
        mLeavingDepth = nodeProp->mLeavingDepth;
@@ -112,10 +98,6 @@ class DicNodeProperties {
        return mAttributesPos;
    }

    int getChildrenCount() const {
        return mChildrenCount;
    }

    int getProbability() const {
        return mProbability;
    }
@@ -137,12 +119,8 @@ class DicNodeProperties {
        return mIsTerminal;
    }

    bool hasMultipleChars() const {
        return mHasMultipleChars;
    }

    bool hasChildren() const {
        return mChildrenCount > 0 || mDepth != mLeavingDepth;
        return mHasChildren || mDepth != mLeavingDepth;
    }

    bool hasBlacklistedOrNotAWordFlag() const {
@@ -153,25 +131,15 @@ class DicNodeProperties {
    // Caution!!!
    // Use a default copy constructor and an assign operator because shallow copies are ok
    // for this class

    // Not used
    int getSiblingPos() const {
        return mSiblingPos;
    }

    int mPos;
    uint8_t mFlags;
    int mChildrenPos;
    int mAttributesPos;
    int mSiblingPos;
    int mChildrenCount;
    int mProbability;
    int mBigramProbability; // not used for now
    int mNodeCodePoint;
    uint16_t mDepth;
    uint16_t mLeavingDepth;
    bool mIsTerminal;
    bool mHasMultipleChars;
    bool mHasChildren;
};
} // namespace latinime
+5 −4
Original line number Diff line number Diff line
@@ -55,11 +55,12 @@ class DicNodeState {
        mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
    }

    // Init by copy and adding subword
    void init(const DicNodeState *const src, const uint16_t additionalSubwordLength,
            const int *const additionalSubword) {
    // Init by copy and adding merged node code points.
    void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
            const int *const mergedNodeCodePoints) {
        init(src);
        mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword);
        mDicNodeStateOutput.addMergedNodeCodePoints(
                mergedNodeCodePointCount, mergedNodeCodePoints);
    }

 private:
+21 −19
Original line number Diff line number Diff line
@@ -26,50 +26,52 @@ namespace latinime {

class DicNodeStateOutput {
 public:
    DicNodeStateOutput() : mOutputtedLength(0) {
    DicNodeStateOutput() : mOutputtedCodePointCount(0) {
        init();
    }

    virtual ~DicNodeStateOutput() {}

    void init() {
        mOutputtedLength = 0;
        mWordBuf[0] = 0;
        mOutputtedCodePointCount = 0;
        mCodePointsBuf[0] = 0;
    }

    void init(const DicNodeStateOutput *const stateOutput) {
        memcpy(mWordBuf, stateOutput->mWordBuf,
                stateOutput->mOutputtedLength * sizeof(mWordBuf[0]));
        mOutputtedLength = stateOutput->mOutputtedLength;
        if (mOutputtedLength < MAX_WORD_LENGTH) {
            mWordBuf[mOutputtedLength] = 0;
        memcpy(mCodePointsBuf, stateOutput->mCodePointsBuf,
                stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
        mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
        if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
            mCodePointsBuf[mOutputtedCodePointCount] = 0;
        }
    }

    void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) {
        if (additionalSubword) {
            memcpy(&mWordBuf[mOutputtedLength], additionalSubword,
                    additionalSubwordLength * sizeof(mWordBuf[0]));
            mOutputtedLength = static_cast<uint16_t>(mOutputtedLength + additionalSubwordLength);
            if (mOutputtedLength < MAX_WORD_LENGTH) {
                mWordBuf[mOutputtedLength] = 0;
    void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
            const int *const mergedNodeCodePoints) {
        if (mergedNodeCodePoints) {
            memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
                    mergedNodeCodePointCount * sizeof(mCodePointsBuf[0]));
            mOutputtedCodePointCount = static_cast<uint16_t>(
                    mOutputtedCodePointCount + mergedNodeCodePointCount);
            if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
                mCodePointsBuf[mOutputtedCodePointCount] = 0;
            }
        }
    }

    // TODO: Remove
    int getCodePointAt(const int id) const {
        return mWordBuf[id];
    int getCodePointAt(const int index) const {
        return mCodePointsBuf[index];
    }

    // TODO: Move to private
    int mWordBuf[MAX_WORD_LENGTH];
    int mCodePointsBuf[MAX_WORD_LENGTH];

 private:
    // Caution!!!
    // Use a default copy constructor and an assign operator because shallow copies are ok
    // for this class
    uint16_t mOutputtedLength;
    uint16_t mOutputtedCodePointCount;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
+21 −28
Original line number Diff line number Diff line
@@ -36,23 +36,17 @@ namespace latinime {

/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
        const int prevWordNodePos, DicNode *const newRootNode) {
    int curPos = binaryDictionaryInfo->getRootPosition();
    const int pos = curPos;
    const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
            binaryDictionaryInfo->getDictRoot(), &curPos);
    const int childrenPos = curPos;
    newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos);
    const int rootPos = binaryDictionaryInfo->getRootPosition();
    const int childrenPos = rootPos;
    newRootNode->initAsRoot(rootPos, childrenPos, prevWordNodePos);
}

/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
        const BinaryDictionaryInfo *const binaryDictionaryInfo,
        DicNode *const prevWordLastNode, DicNode *const newRootNode) {
    int curPos = binaryDictionaryInfo->getRootPosition();
    const int pos = curPos;
    const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
            binaryDictionaryInfo->getDictRoot(), &curPos);
    const int childrenPos = curPos;
    newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount);
    const int rootPos = binaryDictionaryInfo->getRootPosition();
    const int childrenPos = rootPos;
    newRootNode->initAsRootWithPreviousWord(prevWordLastNode, rootPos, childrenPos);
}

/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
@@ -76,7 +70,7 @@ namespace latinime {
}

/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
        const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
        const BinaryDictionaryInfo *const binaryDictionaryInfo,
        const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
        const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
        DicNodeVector *childDicNodes) {
@@ -90,11 +84,10 @@ namespace latinime {
    int codePoint = BinaryFormat::getCodePointAndForwardPointer(
            binaryDictionaryInfo->getDictRoot(), &pos);
    ASSERT(NOT_A_CODE_POINT != codePoint);
    const int nodeCodePoint = codePoint;
    // TODO: optimize this
    int additionalWordBuf[MAX_WORD_LENGTH];
    uint16_t additionalSubwordLength = 0;
    additionalWordBuf[additionalSubwordLength++] = codePoint;
    int mergedNodeCodePoints[MAX_WORD_LENGTH];
    uint16_t mergedNodeCodePointCount = 0;
    mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;

    do {
        const int nextCodePoint = hasMultipleChars
@@ -102,7 +95,7 @@ namespace latinime {
                        binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
        const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
        if (!isLastChar) {
            additionalWordBuf[additionalSubwordLength++] = nextCodePoint;
            mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
        }
        codePoint = nextCodePoint;
    } while (NOT_A_CODE_POINT != codePoint);
@@ -116,17 +109,14 @@ namespace latinime {
    const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
            binaryDictionaryInfo->getDictRoot(), flags, pos);

    if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) {
    if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) {
        return siblingPos;
    }
    if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) {
    if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) {
        return siblingPos;
    }
    const int childrenCount = hasChildren ? BinaryFormat::getGroupCountAndForwardPointer(
            binaryDictionaryInfo->getDictRoot(), &childrenPos) : 0;
    childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos,
            nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
            hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf);
    childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
            probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
    return siblingPos;
}

@@ -163,13 +153,16 @@ namespace latinime {
        const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
        const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
        DicNodeVector *childDicNodes) {
    const int terminalDepth = dicNode->getLeavingDepth();
    const int childCount = dicNode->getChildrenCount();
    if (!dicNode->hasChildren()) {
        return;
    }
    int nextPos = dicNode->getChildrenPos();
    const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
            binaryDictionaryInfo->getDictRoot(), &nextPos);
    for (int i = 0; i < childCount; i++) {
        const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
        nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
                terminalDepth, pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
                pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
                childDicNodes);
        if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
            // All code points have been found.
Loading