Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 87a5c769 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use WordAttributes for checking flags.

Bug: 14425059
Change-Id: Idee84478a482a0e7b5cc53e5dbd4e2484584ba79
parent 2111e3ab
Loading
Loading
Loading
Loading
+3 −10
Original line number Diff line number Diff line
@@ -136,14 +136,14 @@ class DicNode {
    }

    void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
            const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
            const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
            const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount,
            const int *const mergedNodeCodePoints) {
        uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
        mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
        const uint16_t newLeavingDepth = static_cast<uint16_t>(
                dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
        mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0],
                unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth,
                unigramProbability, wordId, newDepth, newLeavingDepth,
                dicNode->mDicNodeProperties.getPrevWordIds());
        mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
                mergedNodeCodePoints);
@@ -178,9 +178,6 @@ class DicNode {
    // Check if the current word and the previous word can be considered as a valid multiple word
    // suggestion.
    bool isValidMultipleWordSuggestion() const {
        if (isBlacklistedOrNotAWord()) {
            return false;
        }
        // Treat suggestion as invalid if the current and the previous word are single character
        // words.
        const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength()
@@ -404,10 +401,6 @@ class DicNode {
        return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes();
    }

    bool isBlacklistedOrNotAWord() const {
        return mDicNodeProperties.isBlacklistedOrNotAWord();
    }

    inline uint16_t getNodeCodePointCount() const {
        return mDicNodeProperties.getDepth();
    }
+4 −0
Original line number Diff line number Diff line
@@ -74,6 +74,10 @@ namespace latinime {
    }
    const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
            dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
    if (dicNode->hasMultipleWords()
            && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) {
        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
    }
    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
    const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
            / static_cast<float>(MAX_PROBABILITY);
+2 −2
Original line number Diff line number Diff line
@@ -59,12 +59,12 @@ class DicNodeVector {
    }

    void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
            const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord,
            const int unigramProbability, const int wordId,
            const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
        ASSERT(!mLock);
        mDicNodes.emplace_back();
        mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability,
                wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints);
                wordId, mergedNodeCodePointCount, mergedNodeCodePoints);
    }

    DicNode *operator[](const int id) {
+4 −14
Original line number Diff line number Diff line
@@ -30,20 +30,19 @@ class DicNodeProperties {
 public:
    AK_FORCE_INLINE DicNodeProperties()
            : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY),
              mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID),
              mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
              mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0),
              mLeavingDepth(0) {}

    ~DicNodeProperties() {}

    // Should be called only once per DicNode is initialized.
    void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability,
            const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth,
            const uint16_t leavingDepth, const int *const prevWordIds) {
            const int wordId, const uint16_t depth, const uint16_t leavingDepth,
            const int *const prevWordIds) {
        mChildrenPtNodeArrayPos = childrenPos;
        mDicNodeCodePoint = nodeCodePoint;
        mUnigramProbability = unigramProbability;
        mWordId = wordId;
        mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
        mDepth = depth;
        mLeavingDepth = leavingDepth;
        memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@@ -55,7 +54,6 @@ class DicNodeProperties {
        mDicNodeCodePoint = NOT_A_CODE_POINT;
        mUnigramProbability = NOT_A_PROBABILITY;
        mWordId = NOT_A_WORD_ID;
        mIsBlacklistedOrNotAWord = false;
        mDepth = 0;
        mLeavingDepth = 0;
        memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds));
@@ -66,7 +64,6 @@ class DicNodeProperties {
        mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
        mUnigramProbability = dicNodeProp->mUnigramProbability;
        mWordId = dicNodeProp->mWordId;
        mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
        mDepth = dicNodeProp->mDepth;
        mLeavingDepth = dicNodeProp->mLeavingDepth;
        memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@@ -78,7 +75,6 @@ class DicNodeProperties {
        mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
        mUnigramProbability = dicNodeProp->mUnigramProbability;
        mWordId = dicNodeProp->mWordId;
        mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
        mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
        mLeavingDepth = dicNodeProp->mLeavingDepth;
        memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds));
@@ -113,10 +109,6 @@ class DicNodeProperties {
        return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth;
    }

    bool isBlacklistedOrNotAWord() const {
        return mIsBlacklistedOrNotAWord;
    }

    const int *getPrevWordIds() const {
        return mPrevWordIds;
    }
@@ -134,8 +126,6 @@ class DicNodeProperties {
    int mUnigramProbability;
    int mDicNodeCodePoint;
    int mWordId;
    // TODO: Remove
    bool mIsBlacklistedOrNotAWord;
    uint16_t mDepth;
    uint16_t mLeavingDepth;
    int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+7 −7
Original line number Diff line number Diff line
@@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
            scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode);
    const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight)
            + doubleLetterCost;
    const bool isPossiblyOffensiveWord =
            traverseSession->getDictionaryStructurePolicy()->getProbability(
                    terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0;
    const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy()
            ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(),
                    terminalDicNode->getWordId(), nullptr /* multiBigramMap */);
    const bool isExactMatch =
            ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes());
    const bool isExactMatchWithIntentionalOmission =
@@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16;
    // Heuristic: We exclude probability=0 first-char-uppercase words from exact match.
    // (e.g. "AMD" and "and")
    const bool isSafeExactMatch = isExactMatch
            && !(isPossiblyOffensiveWord && isFirstCharUppercase);
            && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase);
    const int outputTypeFlags =
            (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
            (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0)
            | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0)
            | (isExactMatchWithIntentionalOmission ?
                    Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0);

    // Entries that are blacklisted or do not represent a word should not be output.
    const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord();
    const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord());
    // When we have to block offensive words, non-exact matched offensive words should not be
    // output.
    const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords();
    const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord
    const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive()
            && !isSafeExactMatch;

    // Increase output score of top typing suggestion to ensure autocorrection.
Loading