Loading native/jni/src/suggest/core/dicnode/dic_node.h +3 −10 Original line number Diff line number Diff line Loading @@ -136,14 +136,14 @@ class DicNode { } void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0], unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth, unigramProbability, wordId, newDepth, newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordIds()); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); Loading Loading @@ -178,9 +178,6 @@ class DicNode { // Check if the current word and the previous word can be considered as a valid multiple word // suggestion. bool isValidMultipleWordSuggestion() const { if (isBlacklistedOrNotAWord()) { return false; } // Treat suggestion as invalid if the current and the previous word are single character // words. const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() Loading Loading @@ -404,10 +401,6 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes(); } bool isBlacklistedOrNotAWord() const { return mDicNodeProperties.isBlacklistedOrNotAWord(); } inline uint16_t getNodeCodePointCount() const { return mDicNodeProperties.getDepth(); } Loading native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +4 −0 Original line number Diff line number Diff line Loading @@ -74,6 +74,10 @@ namespace latinime { } const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); if (dicNode->hasMultipleWords() && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) / static_cast<float>(MAX_PROBABILITY); Loading native/jni/src/suggest/core/dicnode/dic_node_vector.h +2 −2 Original line number Diff line number Diff line Loading @@ -59,12 +59,12 @@ class DicNodeVector { } void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.emplace_back(); mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability, wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); wordId, mergedNodeCodePointCount, mergedNodeCodePoints); } DicNode *operator[](const int id) { Loading native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +4 −14 Original line number Diff line number Diff line Loading @@ -30,20 +30,19 @@ class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0), mLeavingDepth(0) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) { const int wordId, const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) { mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; mUnigramProbability = unigramProbability; mWordId = wordId; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); Loading @@ -55,7 +54,6 @@ class DicNodeProperties { mDicNodeCodePoint = NOT_A_CODE_POINT; mUnigramProbability = NOT_A_PROBABILITY; mWordId = NOT_A_WORD_ID; mIsBlacklistedOrNotAWord = false; mDepth = 0; mLeavingDepth = 0; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); Loading @@ -66,7 +64,6 @@ class DicNodeProperties { mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; mUnigramProbability = dicNodeProp->mUnigramProbability; mWordId = dicNodeProp->mWordId; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); Loading @@ -78,7 +75,6 @@ class DicNodeProperties { mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child mUnigramProbability = dicNodeProp->mUnigramProbability; mWordId = dicNodeProp->mWordId; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); Loading Loading @@ -113,10 +109,6 @@ class DicNodeProperties { return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { return mIsBlacklistedOrNotAWord; } const int *getPrevWordIds() const { return mPrevWordIds; } Loading @@ -134,8 +126,6 @@ class DicNodeProperties { int mUnigramProbability; int mDicNodeCodePoint; int mWordId; // TODO: Remove bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; Loading native/jni/src/suggest/core/result/suggestions_output_utils.cpp +7 −7 Original line number Diff line number Diff line Loading @@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; const bool isPossiblyOffensiveWord = traverseSession->getDictionaryStructurePolicy()->getProbability( terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0; const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy() ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(), terminalDicNode->getWordId(), nullptr /* multiBigramMap */); const bool isExactMatch = ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); const bool isExactMatchWithIntentionalOmission = Loading @@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; // Heuristic: We exclude probability=0 first-char-uppercase words from exact match. // (e.g. "AMD" and "and") const bool isSafeExactMatch = isExactMatch && !(isPossiblyOffensiveWord && isFirstCharUppercase); && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase); const int outputTypeFlags = (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) | (isExactMatchWithIntentionalOmission ? Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()); // When we have to block offensive words, non-exact matched offensive words should not be // output. const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive() && !isSafeExactMatch; // Increase output score of top typing suggestion to ensure autocorrection. Loading Loading
native/jni/src/suggest/core/dicnode/dic_node.h +3 −10 Original line number Diff line number Diff line Loading @@ -136,14 +136,14 @@ class DicNode { } void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1); mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; const uint16_t newLeavingDepth = static_cast<uint16_t>( dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); mDicNodeProperties.init(childrenPtNodeArrayPos, mergedNodeCodePoints[0], unigramProbability, wordId, isBlacklistedOrNotAWord, newDepth, newLeavingDepth, unigramProbability, wordId, newDepth, newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordIds()); mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mergedNodeCodePoints); Loading Loading @@ -178,9 +178,6 @@ class DicNode { // Check if the current word and the previous word can be considered as a valid multiple word // suggestion. bool isValidMultipleWordSuggestion() const { if (isBlacklistedOrNotAWord()) { return false; } // Treat suggestion as invalid if the current and the previous word are single character // words. const int prevWordLen = mDicNodeState.mDicNodeStateOutput.getPrevWordsLength() Loading Loading @@ -404,10 +401,6 @@ class DicNode { return mDicNodeState.mDicNodeStateScoring.getContainedErrorTypes(); } bool isBlacklistedOrNotAWord() const { return mDicNodeProperties.isBlacklistedOrNotAWord(); } inline uint16_t getNodeCodePointCount() const { return mDicNodeProperties.getDepth(); } Loading
native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +4 −0 Original line number Diff line number Diff line Loading @@ -74,6 +74,10 @@ namespace latinime { } const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); if (dicNode->hasMultipleWords() && (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord())) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) / static_cast<float>(MAX_PROBABILITY); Loading
native/jni/src/suggest/core/dicnode/dic_node_vector.h +2 −2 Original line number Diff line number Diff line Loading @@ -59,12 +59,12 @@ class DicNodeVector { } void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos, const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const int unigramProbability, const int wordId, const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) { ASSERT(!mLock); mDicNodes.emplace_back(); mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability, wordId, isBlacklistedOrNotAWord, mergedNodeCodePointCount, mergedNodeCodePoints); wordId, mergedNodeCodePointCount, mergedNodeCodePoints); } DicNode *operator[](const int id) { Loading
native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +4 −14 Original line number Diff line number Diff line Loading @@ -30,20 +30,19 @@ class DicNodeProperties { public: AK_FORCE_INLINE DicNodeProperties() : mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {} mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0), mLeavingDepth(0) {} ~DicNodeProperties() {} // Should be called only once per DicNode is initialized. void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability, const int wordId, const bool isBlacklistedOrNotAWord, const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) { const int wordId, const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) { mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; mUnigramProbability = unigramProbability; mWordId = wordId; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mDepth = depth; mLeavingDepth = leavingDepth; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); Loading @@ -55,7 +54,6 @@ class DicNodeProperties { mDicNodeCodePoint = NOT_A_CODE_POINT; mUnigramProbability = NOT_A_PROBABILITY; mWordId = NOT_A_WORD_ID; mIsBlacklistedOrNotAWord = false; mDepth = 0; mLeavingDepth = 0; memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); Loading @@ -66,7 +64,6 @@ class DicNodeProperties { mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint; mUnigramProbability = dicNodeProp->mUnigramProbability; mWordId = dicNodeProp->mWordId; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); Loading @@ -78,7 +75,6 @@ class DicNodeProperties { mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child mUnigramProbability = dicNodeProp->mUnigramProbability; mWordId = dicNodeProp->mWordId; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); Loading Loading @@ -113,10 +109,6 @@ class DicNodeProperties { return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth; } bool isBlacklistedOrNotAWord() const { return mIsBlacklistedOrNotAWord; } const int *getPrevWordIds() const { return mPrevWordIds; } Loading @@ -134,8 +126,6 @@ class DicNodeProperties { int mUnigramProbability; int mDicNodeCodePoint; int mWordId; // TODO: Remove bool mIsBlacklistedOrNotAWord; uint16_t mDepth; uint16_t mLeavingDepth; int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; Loading
native/jni/src/suggest/core/result/suggestions_output_utils.cpp +7 −7 Original line number Diff line number Diff line Loading @@ -85,9 +85,9 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; scoringPolicy->getDoubleLetterDemotionDistanceCost(terminalDicNode); const float compoundDistance = terminalDicNode->getCompoundDistance(languageWeight) + doubleLetterCost; const bool isPossiblyOffensiveWord = traverseSession->getDictionaryStructurePolicy()->getProbability( terminalDicNode->getUnigramProbability(), NOT_A_PROBABILITY) <= 0; const WordAttributes wordAttributes = traverseSession->getDictionaryStructurePolicy() ->getWordAttributesInContext(terminalDicNode->getPrevWordIds(), terminalDicNode->getWordId(), nullptr /* multiBigramMap */); const bool isExactMatch = ErrorTypeUtils::isExactMatch(terminalDicNode->getContainedErrorTypes()); const bool isExactMatchWithIntentionalOmission = Loading @@ -97,19 +97,19 @@ const int SuggestionsOutputUtils::MIN_LEN_FOR_MULTI_WORD_AUTOCORRECT = 16; // Heuristic: We exclude probability=0 first-char-uppercase words from exact match. // (e.g. "AMD" and "and") const bool isSafeExactMatch = isExactMatch && !(isPossiblyOffensiveWord && isFirstCharUppercase); && !(wordAttributes.isPossiblyOffensive() && isFirstCharUppercase); const int outputTypeFlags = (isPossiblyOffensiveWord ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) (wordAttributes.isPossiblyOffensive() ? Dictionary::KIND_FLAG_POSSIBLY_OFFENSIVE : 0) | ((isSafeExactMatch && boostExactMatches) ? Dictionary::KIND_FLAG_EXACT_MATCH : 0) | (isExactMatchWithIntentionalOmission ? Dictionary::KIND_FLAG_EXACT_MATCH_WITH_INTENTIONAL_OMISSION : 0); // Entries that are blacklisted or do not represent a word should not be output. const bool isValidWord = !terminalDicNode->isBlacklistedOrNotAWord(); const bool isValidWord = !(wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()); // When we have to block offensive words, non-exact matched offensive words should not be // output. const bool blockOffensiveWords = traverseSession->getSuggestOptions()->blockOffensiveWords(); const bool isBlockedOffensiveWord = blockOffensiveWords && isPossiblyOffensiveWord const bool isBlockedOffensiveWord = blockOffensiveWords && wordAttributes.isPossiblyOffensive() && !isSafeExactMatch; // Increase output score of top typing suggestion to ensure autocorrection. Loading