Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +17 −7 Original line number Diff line number Diff line Loading @@ -19,34 +19,44 @@ #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { int pos = nodePos; mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize; const uint8_t *const dictBuf = usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot; int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos; mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const int parentPos = DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); } else { mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); dictBuf, mFlags, MAX_WORD_LENGTH, &pos); } if (isTerminal()) { mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); } else { mProbability = NOT_A_PROBABILITY; } if (hasChildren()) { mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, mFlags, &pos); dictBuf, mFlags, &pos); if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mOriginalDictSize; } } else { mChildrenPos = NOT_A_DICT_POS; } if (usesAdditionalBuffer) { pos += mOriginalDictSize; } if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { mShortcutPos = pos; mShortcutsPolicy->skipAllShortcuts(&pos); Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +8 −3 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ namespace latinime { class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; class ExtendableBuffer; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved Loading @@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicPatriciaTrieNodeReader { public: DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize, const ExtendableBuffer *const extendableBuffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), : mDictRoot(dictRoot), mOriginalDictSize(originalDictSize), mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), Loading Loading @@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader { // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; const int mOriginalDictSize; const ExtendableBuffer *const mExtendableBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; int mNodePos; Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +12 −12 Original line number Diff line number Diff line Loading @@ -33,8 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; Loading Loading @@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); Loading Loading @@ -124,8 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); while (currentLength < length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; Loading Loading @@ -198,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; Loading @@ -211,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; Loading @@ -224,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +8 −5 Original line number Diff line number Diff line Loading @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" Loading @@ -37,7 +37,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer), mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; Loading Loading @@ -93,8 +95,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // TODO: Consolidate mDictRoot. // CAVEAT!: Be careful about array out of bound access with mDictRoot const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; const int mOriginalDictSize; const DynamicBigramListPolicy mBigramListPolicy; const DynamicShortcutListPolicy mShortcutListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp +17 −7 Original line number Diff line number Diff line Loading @@ -19,34 +19,44 @@ #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" namespace latinime { void DynamicPatriciaTrieNodeReader::fetchNodeInfoFromBufferAndProcessMovedNode(const int nodePos, const int maxCodePointCount, int *const outCodePoints) { int pos = nodePos; mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); const bool usesAdditionalBuffer = nodePos >= mOriginalDictSize; const uint8_t *const dictBuf = usesAdditionalBuffer ? mExtendableBuffer->getBuffer() : mDictRoot; int pos = (usesAdditionalBuffer) ? nodePos - mOriginalDictSize : nodePos; mFlags = PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); const int parentPos = DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(mDictRoot, &pos); DynamicPatriciaTrieReadingUtils::getParentPosAndAdvancePosition(dictBuf, &pos); mParentPos = (parentPos != 0) ? mNodePos + parentPos : NOT_A_DICT_POS; if (outCodePoints != 0) { mCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( mDictRoot, mFlags, maxCodePointCount, outCodePoints, &pos); dictBuf, mFlags, maxCodePointCount, outCodePoints, &pos); } else { mCodePointCount = PatriciaTrieReadingUtils::skipCharacters( mDictRoot, mFlags, MAX_WORD_LENGTH, &pos); dictBuf, mFlags, MAX_WORD_LENGTH, &pos); } if (isTerminal()) { mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); mProbability = PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(dictBuf, &pos); } else { mProbability = NOT_A_PROBABILITY; } if (hasChildren()) { mChildrenPos = DynamicPatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( mDictRoot, mFlags, &pos); dictBuf, mFlags, &pos); if (usesAdditionalBuffer && mChildrenPos != NOT_A_DICT_POS) { mChildrenPos += mOriginalDictSize; } } else { mChildrenPos = NOT_A_DICT_POS; } if (usesAdditionalBuffer) { pos += mOriginalDictSize; } if (PatriciaTrieReadingUtils::hasShortcutTargets(mFlags)) { mShortcutPos = pos; mShortcutsPolicy->skipAllShortcuts(&pos); Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h +8 −3 Original line number Diff line number Diff line Loading @@ -27,6 +27,7 @@ namespace latinime { class DictionaryBigramsStructurePolicy; class DictionaryShortcutsStructurePolicy; class ExtendableBuffer; /* * This class is used for helping to read nodes of dynamic patricia trie. This class handles moved Loading @@ -34,10 +35,12 @@ class DictionaryShortcutsStructurePolicy; */ class DynamicPatriciaTrieNodeReader { public: DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, DynamicPatriciaTrieNodeReader(const uint8_t *const dictRoot, const int originalDictSize, const ExtendableBuffer *const extendableBuffer, const DictionaryBigramsStructurePolicy *const bigramsPolicy, const DictionaryShortcutsStructurePolicy *const shortcutsPolicy) : mDictRoot(dictRoot), mBigramsPolicy(bigramsPolicy), : mDictRoot(dictRoot), mOriginalDictSize(originalDictSize), mExtendableBuffer(extendableBuffer), mBigramsPolicy(bigramsPolicy), mShortcutsPolicy(shortcutsPolicy), mNodePos(NOT_A_VALID_WORD_POS), mFlags(0), mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mProbability(NOT_A_PROBABILITY), mChildrenPos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), Loading Loading @@ -123,6 +126,8 @@ class DynamicPatriciaTrieNodeReader { // TODO: Consolidate mDictRoot. const uint8_t *const mDictRoot; const int mOriginalDictSize; const ExtendableBuffer *const mExtendableBuffer; const DictionaryBigramsStructurePolicy *const mBigramsPolicy; const DictionaryShortcutsStructurePolicy *const mShortcutsPolicy; int mNodePos; Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +12 −12 Original line number Diff line number Diff line Loading @@ -33,8 +33,8 @@ void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const d if (!dicNode->hasChildren()) { return; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); int mergedNodeCodePoints[MAX_WORD_LENGTH]; int nextPos = dicNode->getChildrenPos(); int totalChildCount = 0; Loading Loading @@ -79,8 +79,8 @@ int DynamicPatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCoun int mergedNodeCodePoints[maxCodePointCount]; int codePointCount = 0; DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); // First, read terminal node and get its probability. nodeReader.fetchNodeInfoFromBufferAndGetNodeCodePoints(nodePos, maxCodePointCount, mergedNodeCodePoints); Loading Loading @@ -124,8 +124,8 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in int mergedNodeCodePoints[MAX_WORD_LENGTH]; int currentLength = 0; int pos = getRootPosition(); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); while (currentLength < length) { // When foundMatchedNode becomes true, currentLength is increased at least once. bool foundMatchedNode = false; Loading Loading @@ -198,8 +198,8 @@ int DynamicPatriciaTriePolicy::getUnigramProbability(const int nodePos) const { if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_PROBABILITY; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted() || nodeReader.isBlacklisted() || nodeReader.isNotAWord()) { return NOT_A_PROBABILITY; Loading @@ -211,8 +211,8 @@ int DynamicPatriciaTriePolicy::getShortcutPositionOfNode(const int nodePos) cons if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; Loading @@ -224,8 +224,8 @@ int DynamicPatriciaTriePolicy::getBigramsPositionOfNode(const int nodePos) const if (nodePos == NOT_A_VALID_WORD_POS) { return NOT_A_DICT_POS; } DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); DynamicPatriciaTrieNodeReader nodeReader(mDictRoot, mOriginalDictSize, &mExtendableBuffer, getBigramsStructurePolicy(), getShortcutsStructurePolicy()); nodeReader.fetchNodeInfoFromBuffer(nodePos); if (nodeReader.isDeleted()) { return NOT_A_DICT_POS; Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +8 −5 Original line number Diff line number Diff line Loading @@ -21,9 +21,9 @@ #include "defines.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" #include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h" #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" #include "suggest/policyimpl/dictionary/utils/extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" Loading @@ -37,7 +37,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) : mBuffer(buffer), mExtendableBuffer(), mHeaderPolicy(mBuffer->getBuffer()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} mOriginalDictSize(mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer), mShortcutListPolicy(mDictRoot, mOriginalDictSize, &mExtendableBuffer) {} ~DynamicPatriciaTriePolicy() { delete mBuffer; Loading Loading @@ -93,8 +95,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { // TODO: Consolidate mDictRoot. // CAVEAT!: Be careful about array out of bound access with mDictRoot const uint8_t *const mDictRoot; const BigramListPolicy mBigramListPolicy; const ShortcutListPolicy mShortcutListPolicy; const int mOriginalDictSize; const DynamicBigramListPolicy mBigramListPolicy; const DynamicShortcutListPolicy mShortcutListPolicy; }; } // namespace latinime #endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H