Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eddbb7ac authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Merge DicNodeStatePrevWord into DicNoteStateOutput.

Before:
(0)  2232.70 (0.86%)
(1)  255258.50 (98.89%)
(2)  585.73 (0.23%)
(66)  0.26 (0.00%)
Total 258126.46 (sum of others 258077.18)

After:
(0)  2249.23 (0.93%)
(1)  239883.63 (98.83%)
(2)  554.82 (0.23%)
(66)  0.35 (0.00%)
Total 242734.38 (sum of others 242688.04)

Change-Id: I9760cae5b98b3d1f4804b6b60317887eaa3ff71c
parent adfb2627
Loading
Loading
Loading
Loading
+50 −54
Original line number Diff line number Diff line
@@ -20,28 +20,33 @@
#include "defines.h"
#include "suggest/core/dicnode/dic_node_profiler.h"
#include "suggest/core/dicnode/dic_node_release_listener.h"
#include "suggest/core/dicnode/dic_node_utils.h"
#include "suggest/core/dicnode/internal/dic_node_state.h"
#include "suggest/core/dicnode/internal/dic_node_properties.h"
#include "suggest/core/dictionary/digraph_utils.h"
#include "suggest/core/dictionary/error_type_utils.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "utils/char_utils.h"

#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
        do { char charBuf[50]; \
        do { \
            char charBuf[50]; \
            INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
            AKLOGI("%20s, \"%c\", size = %03d, total = %03d, index(0) = %02d, dist = %.4f, %s,,", \
                    __FUNCTION__, getNodeCodePoint(), inputSize, getTotalInputIndex(), \
                getInputIndex(0), getNormalizedCompoundDistance(), charBuf); } while (0)
                    getInputIndex(0), getNormalizedCompoundDistance(), charBuf); \
        } while (0)
#define DUMP_WORD_AND_SCORE(header) \
        do { char charBuf[50]; char prevWordCharBuf[50]; \
        INTS_TO_CHARS(getOutputWordBuf(), getNodeCodePointCount(), charBuf, NELEMS(charBuf)); \
        INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(), \
                mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
                NELEMS(prevWordCharBuf)); \
        AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \
        do { \
            char charBuf[50]; \
            INTS_TO_CHARS(getOutputWordBuf(), \
                    getNodeCodePointCount() \
                            + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength(), \
                    charBuf, NELEMS(charBuf)); \
            AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %d, %5f,", header, \
                    getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
                getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
                    getNormalizedCompoundDistance(), getRawLength(), charBuf, \
                    getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
        } while (0)
#else
@@ -103,8 +108,8 @@ class DicNode {
    void initByCopy(const DicNode *const dicNode) {
        mIsUsed = true;
        mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
        mDicNodeProperties.init(&dicNode->mDicNodeProperties);
        mDicNodeState.init(&dicNode->mDicNodeState);
        mDicNodeProperties.initByCopy(&dicNode->mDicNodeProperties);
        mDicNodeState.initByCopy(&dicNode->mDicNodeState);
        PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
    }

@@ -112,12 +117,8 @@ class DicNode {
    void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
        mIsUsed = true;
        mIsCachedForNextSuggestion = false;
        mDicNodeProperties.init(
                NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
                NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
                true /* hasChildren */, false /* isBlacklistedOrNotAWord */, 0 /* depth */,
                0 /* terminalDepth */);
        mDicNodeState.init(prevWordPtNodePos);
        mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos);
        mDicNodeState.init();
        PROF_NODE_RESET(mProfiler);
    }

@@ -125,13 +126,8 @@ class DicNode {
    void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
        mIsUsed = true;
        mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
        mDicNodeProperties.init(
                NOT_A_DICT_POS /* pos */, rootPtNodeArrayPos, NOT_A_CODE_POINT /* nodeCodePoint */,
                NOT_A_PROBABILITY /* probability */, false /* isTerminal */,
                true /* hasChildren */, false /* isBlacklistedOrNotAWord */,  0 /* depth */,
                0 /* terminalDepth */);
        mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos());
        mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
                dicNode->mDicNodeProperties.getPtNodePos(),
                dicNode->mDicNodeProperties.getDepth());
        PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
    }
@@ -141,7 +137,7 @@ class DicNode {
        mIsCachedForNextSuggestion = parentDicNode->mIsCachedForNextSuggestion;
        const int parentCodePoint = parentDicNode->getNodeTypedCodePoint();
        mDicNodeProperties.init(&parentDicNode->mDicNodeProperties, parentCodePoint);
        mDicNodeState.init(&parentDicNode->mDicNodeState);
        mDicNodeState.initByCopy(&parentDicNode->mDicNodeState);
        PROF_NODE_COPY(&parentDicNode->mProfiler, mProfiler);
    }

@@ -156,7 +152,7 @@ class DicNode {
                dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
        mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
                probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
                newLeavingDepth);
                newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos());
        mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
                mergedNodeCodePoints);
        PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@@ -200,7 +196,7 @@ class DicNode {

    // Used to expand the node in DicNodeUtils
    int getNodeTypedCodePoint() const {
        return mDicNodeState.mDicNodeStateOutput.getCodePointAt(getNodeCodePointCount());
        return mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(getNodeCodePointCount());
    }

    // Check if the current word and the previous word can be considered as a valid multiple word
@@ -211,19 +207,19 @@ class DicNode {
        }
        // Treat suggestion as invalid if the current and the previous word are single character
        // words.
        const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
                - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
        const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
                - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
        const int currentWordLen = getNodeCodePointCount();
        return (prevWordLen != 1 || currentWordLen != 1);
    }

    bool isFirstCharUppercase() const {
        const int c = mDicNodeState.mDicNodeStateOutput.getCodePointAt(0);
        const int c = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(0);
        return CharUtils::isAsciiUpper(c);
    }

    bool isFirstWord() const {
        return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos() == NOT_A_DICT_POS;
        return mDicNodeProperties.getPrevWordTerminalPtNodePos() == NOT_A_DICT_POS;
    }

    bool isCompletion(const int inputSize) const {
@@ -241,7 +237,7 @@ class DicNode {

    // Used to get bigram probability in DicNodeUtils
    int getPrevWordTerminalPtNodePos() const {
        return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
        return mDicNodeProperties.getPrevWordTerminalPtNodePos();
    }

    // Used in DicNodeUtils
@@ -263,8 +259,8 @@ class DicNode {

    bool shouldBeFilteredBySafetyNetForBigram() const {
        const uint16_t currentDepth = getNodeCodePointCount();
        const int prevWordLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()
                - mDicNodeState.mDicNodeStatePrevWord.getPrevWordStart() - 1;
        const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
                - mDicNodeState.mDicNodeStateOutput.getPrevWordStart() - 1;
        return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
    }

@@ -277,7 +273,7 @@ class DicNode {
    }

    bool isTotalInputSizeExceedingLimit() const {
        const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
        const int prevWordsLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
        const int currentWordDepth = getNodeCodePointCount();
        // TODO: 3 can be 2? Needs to be investigated.
        // TODO: Have a const variable for 3 (or 2)
@@ -285,25 +281,24 @@ class DicNode {
    }

    void outputResult(int *dest) const {
        const uint16_t prevWordLength = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
        const uint16_t prevWordLength = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
        const uint16_t currentDepth = getNodeCodePointCount();
        DicNodeUtils::appendTwoWords(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(),
                   prevWordLength, getOutputWordBuf(), currentDepth, dest);
        memmove(dest, getOutputWordBuf(), (prevWordLength + currentDepth) * sizeof(dest[0]));
        DUMP_WORD_AND_SCORE("OUTPUT");
    }

    // "Total" in this context (and other methods in this class) means the whole suggestion. When
    // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only
    // the one that corresponds to the last word of the suggestion, and all the previous words
    // are concatenated together in mPrevWord - which contains a space at the end.
    // are concatenated together in mDicNodeStateOutput.
    int getTotalNodeSpaceCount() const {
        if (isFirstWord()) return 0;
        return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.getPrevWordBuf(),
                mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength());
        return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStateOutput.getCodePointBuf(),
                mDicNodeState.mDicNodeStateOutput.getPrevWordsLength());
    }

    int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const {
        const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex();
        const int inputIndex = mDicNodeState.mDicNodeStateOutput.getSecondWordFirstInputIndex();
        if (inputIndex == NOT_AN_INDEX) {
            return NOT_AN_INDEX;
        } else {
@@ -312,7 +307,7 @@ class DicNode {
    }

    bool hasMultipleWords() const {
        return mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() > 0;
        return mDicNodeState.mDicNodeStateOutput.getPrevWordCount() > 0;
    }

    int getProximityCorrectionCount() const {
@@ -346,7 +341,7 @@ class DicNode {

    // Used to commit input partially
    int getPrevWordPtNodePos() const {
        return mDicNodeState.mDicNodeStatePrevWord.getPrevWordPtNodePos();
        return mDicNodeProperties.getPrevWordTerminalPtNodePos();
    }

    AK_FORCE_INLINE const int *getOutputWordBuf() const {
@@ -425,7 +420,7 @@ class DicNode {
    float getLanguageDistanceRatePerWordForScoring() const {
        const float langDist = getLanguageDistanceForScoring();
        const float totalWordCount =
                static_cast<float>(mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() + 1);
                static_cast<float>(mDicNodeState.mDicNodeStateOutput.getPrevWordCount() + 1);
        return langDist / totalWordCount;
    }

@@ -469,7 +464,7 @@ class DicNode {

    // Returns code point count including spaces
    inline uint16_t getTotalNodeCodePointCount() const {
        return getNodeCodePointCount() + mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
        return getNodeCodePointCount() + mDicNodeState.mDicNodeStateOutput.getPrevWordsLength();
    }

    AK_FORCE_INLINE void dump(const char *tag) const {
@@ -516,8 +511,9 @@ class DicNode {
            return depthDiff > 0;
        }
        for (int i = 0; i < depth; ++i) {
            const int codePoint = mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
            const int rightCodePoint = right->mDicNodeState.mDicNodeStateOutput.getCodePointAt(i);
            const int codePoint = mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
            const int rightCodePoint =
                    right->mDicNodeState.mDicNodeStateOutput.getCurrentWordCodePointAt(i);
            if (codePoint != rightCodePoint) {
                return rightCodePoint > codePoint;
            }
@@ -574,8 +570,8 @@ class DicNode {
    }

    AK_FORCE_INLINE void updateInputIndexG(const DicNode_InputStateG *const inputStateG) {
        if (mDicNodeState.mDicNodeStatePrevWord.getPrevWordCount() == 1 && isFirstLetter()) {
            mDicNodeState.mDicNodeStatePrevWord.setSecondWordFirstInputIndex(
        if (mDicNodeState.mDicNodeStateOutput.getPrevWordCount() == 1 && isFirstLetter()) {
            mDicNodeState.mDicNodeStateOutput.setSecondWordFirstInputIndex(
                    inputStateG->mInputIndex);
        }
        mDicNodeState.mDicNodeStateInput.updateInputIndexG(inputStateG->mPointerId,
+29 −6
Original line number Diff line number Diff line
@@ -29,16 +29,18 @@ namespace latinime {
class DicNodeProperties {
 public:
    AK_FORCE_INLINE DicNodeProperties()
            : mPtNodePos(0), mChildrenPtNodeArrayPos(0), mProbability(0), mDicNodeCodePoint(0),
              mIsTerminal(false), mHasChildrenPtNodes(false), mIsBlacklistedOrNotAWord(false),
              mDepth(0), mLeavingDepth(0) {}
            : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
              mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
              mIsTerminal(false), mHasChildrenPtNodes(false),
              mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0),
              mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}

    ~DicNodeProperties() {}

    // Should be called only once per DicNode is initialized.
    void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
            const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
            const uint16_t depth, const uint16_t leavingDepth) {
            const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) {
        mPtNodePos = pos;
        mChildrenPtNodeArrayPos = childrenPos;
        mDicNodeCodePoint = nodeCodePoint;
@@ -48,10 +50,24 @@ class DicNodeProperties {
        mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
        mDepth = depth;
        mLeavingDepth = leavingDepth;
        mPrevWordTerminalPtNodePos = prevWordNodePos;
    }

    // Init for copy
    void init(const DicNodeProperties *const dicNodeProp) {
    // Init for root with prevWordPtNodePos which is used for bigram
    void init(const int rootPtNodeArrayPos, const int prevWordNodePos) {
        mPtNodePos = NOT_A_DICT_POS;
        mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
        mDicNodeCodePoint = NOT_A_CODE_POINT;
        mProbability = NOT_A_PROBABILITY;
        mIsTerminal = false;
        mHasChildrenPtNodes = true;
        mIsBlacklistedOrNotAWord = false;
        mDepth = 0;
        mLeavingDepth = 0;
        mPrevWordTerminalPtNodePos = prevWordNodePos;
    }

    void initByCopy(const DicNodeProperties *const dicNodeProp) {
        mPtNodePos = dicNodeProp->mPtNodePos;
        mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
        mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
@@ -61,6 +77,7 @@ class DicNodeProperties {
        mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
        mDepth = dicNodeProp->mDepth;
        mLeavingDepth = dicNodeProp->mLeavingDepth;
        mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
    }

    // Init as passing child
@@ -74,6 +91,7 @@ class DicNodeProperties {
        mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
        mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
        mLeavingDepth = dicNodeProp->mLeavingDepth;
        mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
    }

    int getPtNodePos() const {
@@ -113,6 +131,10 @@ class DicNodeProperties {
        return mIsBlacklistedOrNotAWord;
    }

    int getPrevWordTerminalPtNodePos() const {
        return mPrevWordTerminalPtNodePos;
    }

 private:
    // Caution!!!
    // Use a default copy constructor and an assign operator because shallow copies are ok
@@ -126,6 +148,7 @@ class DicNodeProperties {
    bool mIsBlacklistedOrNotAWord;
    uint16_t mDepth;
    uint16_t mLeavingDepth;
    int mPrevWordTerminalPtNodePos;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H
+14 −30
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@
#include "defines.h"
#include "suggest/core/dicnode/internal/dic_node_state_input.h"
#include "suggest/core/dicnode/internal/dic_node_state_output.h"
#include "suggest/core/dicnode/internal/dic_node_state_prevword.h"
#include "suggest/core/dicnode/internal/dic_node_state_scoring.h"

namespace latinime {
@@ -29,65 +28,50 @@ class DicNodeState {
 public:
    DicNodeStateInput mDicNodeStateInput;
    DicNodeStateOutput mDicNodeStateOutput;
    DicNodeStatePrevWord mDicNodeStatePrevWord;
    DicNodeStateScoring mDicNodeStateScoring;

    AK_FORCE_INLINE DicNodeState()
            : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
              mDicNodeStateScoring() {
    }
            : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {}

    ~DicNodeState() {}

    DicNodeState &operator=(const DicNodeState& src) {
        init(&src);
        initByCopy(&src);
        return *this;
    }

    DicNodeState(const DicNodeState& src)
            : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStatePrevWord(),
              mDicNodeStateScoring() {
        init(&src);
            : mDicNodeStateInput(), mDicNodeStateOutput(), mDicNodeStateScoring() {
        initByCopy(&src);
    }

    // Init with prevWordPos
    void init(const int prevWordPos) {
    // Init for root
    void init() {
        mDicNodeStateInput.init();
        mDicNodeStateOutput.init();
        mDicNodeStatePrevWord.init(prevWordPos);
        mDicNodeStateScoring.init();
    }

    // Init with previous word.
    void initAsRootWithPreviousWord(const DicNodeState *prevWordDicNodeState,
            const int prevWordPos, const int prevWordCodePointCount) {
        mDicNodeStateOutput.init(); // reset for next word
            const int prevWordCodePointCount) {
        mDicNodeStateOutput.init(&prevWordDicNodeState->mDicNodeStateOutput);
        mDicNodeStateInput.init(
                &prevWordDicNodeState->mDicNodeStateInput, true /* resetTerminalDiffCost */);
        mDicNodeStateScoring.init(&prevWordDicNodeState->mDicNodeStateScoring);
        mDicNodeStatePrevWord.init(
                prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordCount() + 1,
                prevWordPos,
                prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordBuf(),
                prevWordDicNodeState->mDicNodeStatePrevWord.getPrevWordLength(),
                prevWordDicNodeState->mDicNodeStateOutput.getCodePointBuf(),
                prevWordCodePointCount,
                prevWordDicNodeState->mDicNodeStatePrevWord.getSecondWordFirstInputIndex(),
                prevWordDicNodeState->mDicNodeStateInput.getInputIndex(0) /* lastInputIndex */);
        mDicNodeStateScoring.initByCopy(&prevWordDicNodeState->mDicNodeStateScoring);
    }

    // Init by copy
    AK_FORCE_INLINE void init(const DicNodeState *const src) {
        mDicNodeStateInput.init(&src->mDicNodeStateInput);
        mDicNodeStateOutput.init(&src->mDicNodeStateOutput);
        mDicNodeStatePrevWord.init(&src->mDicNodeStatePrevWord);
        mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
    AK_FORCE_INLINE void initByCopy(const DicNodeState *const src) {
        mDicNodeStateInput.initByCopy(&src->mDicNodeStateInput);
        mDicNodeStateOutput.initByCopy(&src->mDicNodeStateOutput);
        mDicNodeStateScoring.initByCopy(&src->mDicNodeStateScoring);
    }

    // Init by copy and adding merged node code points.
    void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
            const int *const mergedNodeCodePoints) {
        init(src);
        initByCopy(src);
        mDicNodeStateOutput.addMergedNodeCodePoints(
                mergedNodeCodePointCount, mergedNodeCodePoints);
    }
+1 −1
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ class DicNodeStateInput {
        mTerminalDiffCost[pointerId] = terminalDiffCost;
    }

    void init(const DicNodeStateInput *const src) {
    void initByCopy(const DicNodeStateInput *const src) {
        init(src, false);
    }

+85 −13

File changed.

Preview size limit exceeded, changes collapsed.

Loading