Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +98 −16 Original line number Diff line number Diff line Loading @@ -47,16 +47,18 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( const int nodeCodePointCount = nodeReader->getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { const int nextIndex = matchedCodePointCount + j; if (nextIndex >= codePointCount) { // TODO: split current node after j - 1, create child and make this terminal. return false; } if (!readingHelper->isMatchedCodePoint(j, if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, wordCodePoints[matchedCodePointCount + j])) { // TODO: split current node after j - 1 and create two children. if (ENABLE_DYNAMIC_UPDATE) { return reallocatePtNodeAndAddNewPtNodes(nodeReader, readingHelper->getMergedNodeCodePoints(), j, probability, wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } else { return false; } } } // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount()) { if (ENABLE_DYNAMIC_UPDATE) { Loading Loading @@ -145,8 +147,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( return true; } // Write new node at writingPos. bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted, // Write new PtNode at writingPos. bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, const int originalShortcutListPos, Loading Loading @@ -203,6 +205,25 @@ bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlackliste return true; } bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos) { return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPos, codePoints, codePointCount, probability, NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */, writingPos); } bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos) { return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(), originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, originalNode->getChildrenPos(), originalNode->getBigramsPos(), originalNode->getShortcutPos(), writingPos); } bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos) { Loading Loading @@ -231,10 +252,8 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { return false; } if (!writeNodeToBuffer(originalPtNode->isBlacklisted(), originalPtNode->isNotAWord(), originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), probability, originalPtNode->getChildrenPos(), originalPtNode->getBigramsPos(), originalPtNode->getShortcutPos(), &movedPos)) { if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) { return false; } } Loading Loading @@ -262,9 +281,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( 1 /* arraySize */, &writingPos)) { return false; } if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */, if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability, &writingPos)) { return false; } Loading @@ -275,4 +292,69 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( return true; } // Returns whether the dictionary updating was succeeded or not. bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. // Reallocating PtNode: abcde, newNode: abcxy. // abc (1st, not terminal) __ de (2nd) // \_ xy (extra child, terminal) // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; const int firstPtNodePos = mBuffer->getTailPosition(); if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) { return false; } int writingPos = firstPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, &writingPos)) { return false; } const int actualChildrenPos = writingPos; // Create new children PtNode array. const size_t newPtNodeCount = addsExtraChild ? 2 : 1; if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, newPtNodeCount, &writingPos)) { return false; } // Write the 2nd part of the reallocating node. if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, reallocatingPtNode->getNodePos(), reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNode->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(), newNodeCodePoints + overlappingCodePointCount, newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, &writingPos)) { return false; } } if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos); // Update children position. int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, actualChildrenPos, &childrenPosFieldPos)) { return false; } return true; } } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +17 −3 Original line number Diff line number Diff line Loading @@ -57,11 +57,19 @@ class DynamicPatriciaTrieWritingHelper { bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos); bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, const int originalShortcutListPos, int *const writingPos); bool writePtNodeToBuffer(const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); bool writePtNodeToBufferByCopyingPtNodeInfo( const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); Loading @@ -74,6 +82,12 @@ class DynamicPatriciaTrieWritingHelper { bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability); bool reallocatePtNodeAndAddNewPtNodes( const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +98 −16 Original line number Diff line number Diff line Loading @@ -47,16 +47,18 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord( const int nodeCodePointCount = nodeReader->getCodePointCount(); for (int j = 1; j < nodeCodePointCount; ++j) { const int nextIndex = matchedCodePointCount + j; if (nextIndex >= codePointCount) { // TODO: split current node after j - 1, create child and make this terminal. return false; } if (!readingHelper->isMatchedCodePoint(j, if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, wordCodePoints[matchedCodePointCount + j])) { // TODO: split current node after j - 1 and create two children. if (ENABLE_DYNAMIC_UPDATE) { return reallocatePtNodeAndAddNewPtNodes(nodeReader, readingHelper->getMergedNodeCodePoints(), j, probability, wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } else { return false; } } } // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount()) { if (ENABLE_DYNAMIC_UPDATE) { Loading Loading @@ -145,8 +147,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( return true; } // Write new node at writingPos. bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted, // Write new PtNode at writingPos. bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, const int originalShortcutListPos, Loading Loading @@ -203,6 +205,25 @@ bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlackliste return true; } bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos) { return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPos, codePoints, codePointCount, probability, NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */, writingPos); } bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos) { return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(), originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, originalNode->getChildrenPos(), originalNode->getBigramsPos(), originalNode->getShortcutPos(), writingPos); } bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos) { Loading Loading @@ -231,10 +252,8 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { return false; } if (!writeNodeToBuffer(originalPtNode->isBlacklisted(), originalPtNode->isNotAWord(), originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), probability, originalPtNode->getChildrenPos(), originalPtNode->getBigramsPos(), originalPtNode->getShortcutPos(), &movedPos)) { if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) { return false; } } Loading Loading @@ -262,9 +281,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( 1 /* arraySize */, &writingPos)) { return false; } if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */, if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability, &writingPos)) { return false; } Loading @@ -275,4 +292,69 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( return true; } // Returns whether the dictionary updating was succeeded or not. bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. // Reallocating PtNode: abcde, newNode: abcxy. // abc (1st, not terminal) __ de (2nd) // \_ xy (extra child, terminal) // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; const int firstPtNodePos = mBuffer->getTailPosition(); if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) { return false; } int writingPos = firstPtNodePos; // Write the 1st part of the reallocating node. The children position will be updated later // with actual children position. const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, &writingPos)) { return false; } const int actualChildrenPos = writingPos; // Create new children PtNode array. const size_t newPtNodeCount = addsExtraChild ? 2 : 1; if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, newPtNodeCount, &writingPos)) { return false; } // Write the 2nd part of the reallocating node. if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode, reallocatingPtNode->getNodePos(), reallocatingPtNodeCodePoints + overlappingCodePointCount, reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, reallocatingPtNode->getProbability(), &writingPos)) { return false; } if (addsExtraChild) { if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(), newNodeCodePoints + overlappingCodePointCount, newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, &writingPos)) { return false; } } if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { return false; } // Load node info. Information of the 1st part will be fetched. DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos); // Update children position. int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, actualChildrenPos, &childrenPosFieldPos)) { return false; } return true; } } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +17 −3 Original line number Diff line number Diff line Loading @@ -57,11 +57,19 @@ class DynamicPatriciaTrieWritingHelper { bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, const int movedPos); bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, const int childrenPos, const int originalBigramListPos, const int originalShortcutListPos, int *const writingPos); bool writePtNodeToBuffer(const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); bool writePtNodeToBufferByCopyingPtNodeInfo( const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, const int *const codePoints, const int codePointCount, const int probability, int *const writingPos); bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); Loading @@ -74,6 +82,12 @@ class DynamicPatriciaTrieWritingHelper { bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability); bool reallocatePtNodeAndAddNewPtNodes( const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, const int probabilityOfNewPtNode, const int *const newNodeCodePoints, const int newNodeCodePointCount); }; } // namespace latinime #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */