Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ae41d359 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Use WordAttributes for checking flags."

parents 6443d799 87a5c769
Loading
Loading
Loading
Loading
+3 −10
Original line number Diff line number Diff line
@@ -136,14 +136,14 @@ class DicNode {
    }

    void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
            const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
            const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
            const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount,
            const int *const mergedNodeCodePoints) {
        uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
        mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
        const uint16_t newLeavingDepth = static_cast<uint16_t>(
                dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
        mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0],
                unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth,
                unigramProbability, wordId, newDepth, newLeavingDepth,
                dicNode->mDicNodeProperties.getPrevWordIds());
        mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
                mergedNodeCodePoints);
@@ -178,9 +178,6 @@ class DicNode {
    // Check if the current word and the previous word can be considered as a valid multiple word
    // suggestion.
    bool isValidMultipleWordSuggestion() const {
        if (isBlacklistedOrNotAWord()) {
            return false;
        }
        // Treat suggestion as invalid if the current and the previous word are single character
        // words.
        const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
@@ -404,10 +401,6 @@ class DicNode {
        return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
    }

    bool isBlacklistedOrNotAWord() const {
        return mDicNodeProperties.isBlacklistedOrNotAWord();
    }

    inline uint16_t getNodeCodePointCount() const {
        return mDicNodeProperties.getDepth();
    }
+4 −0
Original line number Diff line number Diff line
@@ -74,6 +74,10 @@ namespace latinime {
    }
    const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
            dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
    if (dicNode->hasMultipleWords()
            && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) {
        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
    }
    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
    const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
            / static_cast<float>(MAX_PROBABILITY);
+2 −2
Original line number Diff line number Diff line
@@ -59,12 +59,12 @@ class DicNodeVector {
    }

    void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
            const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
            const int unigramProbability, const int wordId,
            const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
        ASSERT(!mLock);
        mDicNodes.emplace_back();
        mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability,
                wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
                wordId, mergedNodeCodePointCount, mergedNodeCodePoints);
    }

    DicNode *operator[](const int id) {
+4 −14
Original line number Diff line number Diff line
@@ -30,20 +30,19 @@ class DicNodeProperties {
 public:
    AK_FORCE_INLINE DicNodeProperties()
            : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY),
              mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID),
              mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
              mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0),
              mLeavingDepth(0) {}

    ~DicNodeProperties() {}

    // Should be called only once per DicNode is initialized.
    void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability,
            const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth,
            const uint16_t leavingDepth, const int *const prevWordIds) {
            const int wordId, const uint16_t depth, const uint16_t leavingDepth,
            const int *const prevWordIds) {
        mChildrenPtNodeArrayPos = childrenPos;
        mDicNodeCodePoint = nodeCodePoint;
        mUnigramProbability = unigramProbability;
        mWordId = wordId;
        mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
        mDepth = depth;
        mLeavingDepth = leavingDepth;
        memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@@ -55,7 +54,6 @@ class DicNodeProperties {
        mDicNodeCodePoint = NOT_A_CODE_POINT;
        mUnigramProbability = NOT_A_PROBABILITY;
        mWordId = NOT_A_WORD_ID;
        mIsBlacklistedOrNotAWord = false;
        mDepth = 0;
        mLeavingDepth = 0;
        memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@@ -66,7 +64,6 @@ class DicNodeProperties {
        mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
        mUnigramProbability = dicNodeProp->mUnigramProbability;
        mWordId = dicNodeProp->mWordId;
        mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
        mDepth = dicNodeProp->mDepth;
        mLeavingDepth = dicNodeProp->mLeavingDepth;
        memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@@ -78,7 +75,6 @@ class DicNodeProperties {
        mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
        mUnigramProbability = dicNodeProp->mUnigramProbability;
        mWordId = dicNodeProp->mWordId;
        mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
        mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
        mLeavingDepth = dicNodeProp->mLeavingDepth;
        memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@@ -113,10 +109,6 @@ class DicNodeProperties {
        return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth;
    }

    bool isBlacklistedOrNotAWord() const {
        return mIsBlacklistedOrNotAWord;
    }

    const int *getPrevWordIds() const {
        return mPrevWordIds;
    }
@@ -134,8 +126,6 @@ class DicNodeProperties {
    int mUnigramProbability;
    int mDicNodeCodePoint;
    int mWordId;
    // TODO: Remove
    bool mIsBlacklistedOrNotAWord;
    uint16_t mDepth;
    uint16_t mLeavingDepth;
    int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+7 −7
Original line number Diff line number Diff line
@@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
            scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
    const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
            + doubleLetterCost;
    const bool isPossiblyOffensiveWord =
            traverseSession->getDictionaryStructurePolicy()->getProbability(
                    terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0;
    const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
            ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
                    terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
    const bool isExactMatch =
            ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
    const bool isExactMatchWithIntentionalOmission =
@@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
    // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
    // (e.g. "AMD" and "and")
    const bool isSafeExactMatch = isExactMatch
            && !(isPossiblyOffensiveWord && isFirstCharUppercase);
            && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase);
    const int outputTypeFlags =
            (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
            (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
            | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
            | (isExactMatchWithIntentionalOmission ?
                    Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);

    // Entries that are blacklisted or do not represent a word should not be output.
    const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
    const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());
    // When we have to block offensive words, non-exact matched offensive words should not be
    // output.
    const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
    const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord
    const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive()
            && !isSafeExactMatch;

    // Increase output score of top typing suggestion to ensure autocorrection.
Loading