Loading native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +5 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "utils/char_utils.h" namespace latinime { Loading Loading @@ -158,6 +159,10 @@ class PtNodeParams { return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); } AK_FORCE_INLINE bool representsNonWordInfo() const { return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]); } // Parent node position AK_FORCE_INLINE int getParentPos() const { return mParentPos; Loading native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +10 −6 Original line number Diff line number Diff line Loading @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "utils/char_utils.h" namespace latinime { Loading Loading @@ -318,12 +319,15 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); // Skip PtNodes don't start with Unicode code point because they represent non-word information. if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), PatriciaTrieReadingUtils::isBlacklisted(flags) || PatriciaTrieReadingUtils::isNotAWord(flags), mergedNodeCodePointCount, mergedNodeCodePoints); } return siblingPos; } Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +5 −1 Original line number Diff line number Diff line Loading @@ -59,13 +59,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // valid terminal DicNode. isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } readingHelper.readNextSiblingNode(ptNodeParams); if (!ptNodeParams.representsNonWordInfo()) { // Skip PtNodes that represent non-word information. continue; } childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); readingHelper.readNextSiblingNode(ptNodeParams); } if (readingHelper.isError()) { mIsCorrupted = true; Loading native/jni/src/utils/char_utils.cpp +3 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,9 @@ namespace latinime { const int CharUtils::MIN_UNICODE_CODE_POINT = 0; const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF; struct LatinCapitalSmallPair { unsigned short capital; unsigned short small; Loading native/jni/src/utils/char_utils.h +7 −0 Original line number Diff line number Diff line Loading @@ -86,12 +86,19 @@ class CharUtils { return spaceCount; } static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) { return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; } static unsigned short latin_tolower(const unsigned short c); static const std::vector<int> EMPTY_STRING; private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); static const int MIN_UNICODE_CODE_POINT; static const int MAX_UNICODE_CODE_POINT; /** * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c Loading Loading
native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h +5 −0 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "utils/char_utils.h" namespace latinime { Loading Loading @@ -158,6 +159,10 @@ class PtNodeParams { return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags); } AK_FORCE_INLINE bool representsNonWordInfo() const { return getCodePointCount() > 0 && CharUtils::isInUnicodeSpace(getCodePoints()[0]); } // Parent node position AK_FORCE_INLINE int getParentPos() const { return mParentPos; Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +10 −6 Original line number Diff line number Diff line Loading @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" #include "utils/char_utils.h" namespace latinime { Loading Loading @@ -318,12 +319,15 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); // Skip PtNodes don't start with Unicode code point because they represent non-word information. if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, PatriciaTrieReadingUtils::isTerminal(flags), PatriciaTrieReadingUtils::hasChildrenInFlags(flags), PatriciaTrieReadingUtils::isBlacklisted(flags) || PatriciaTrieReadingUtils::isNotAWord(flags), mergedNodeCodePointCount, mergedNodeCodePoints); } return siblingPos; } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +5 −1 Original line number Diff line number Diff line Loading @@ -59,13 +59,17 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d // valid terminal DicNode. isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY; } readingHelper.readNextSiblingNode(ptNodeParams); if (!ptNodeParams.representsNonWordInfo()) { // Skip PtNodes that represent non-word information. continue; } childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(), ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal, ptNodeParams.hasChildren(), ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */, ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints()); readingHelper.readNextSiblingNode(ptNodeParams); } if (readingHelper.isError()) { mIsCorrupted = true; Loading
native/jni/src/utils/char_utils.cpp +3 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,9 @@ namespace latinime { const int CharUtils::MIN_UNICODE_CODE_POINT = 0; const int CharUtils::MAX_UNICODE_CODE_POINT = 0x10FFFF; struct LatinCapitalSmallPair { unsigned short capital; unsigned short small; Loading
native/jni/src/utils/char_utils.h +7 −0 Original line number Diff line number Diff line Loading @@ -86,12 +86,19 @@ class CharUtils { return spaceCount; } static AK_FORCE_INLINE int isInUnicodeSpace(const int codePoint) { return codePoint >= MIN_UNICODE_CODE_POINT && codePoint <= MAX_UNICODE_CODE_POINT; } static unsigned short latin_tolower(const unsigned short c); static const std::vector<int> EMPTY_STRING; private: DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); static const int MIN_UNICODE_CODE_POINT; static const int MAX_UNICODE_CODE_POINT; /** * Table mapping most combined Latin, Greek, and Cyrillic characters * to their base characters. If c is in range, BASE_CHARS[c] == c Loading