Loading native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +41 −44 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" Loading @@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; bool DynamicPtUpdatingHelper::addUnigramWord( DynamicPtReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability, const bool isNotAWord, const bool isBlacklisted, const int timestamp, bool *const outAddedNewUnigram) { const int *const wordCodePoints, const int codePointCount, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); Loading @@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord( if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted, probability, timestamp, wordCodePoints + matchedCodePointCount, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty, wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } } // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability, timestamp, outAddedNewUnigram); return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram); } if (!ptNodeParams.hasChildren()) { *outAddedNewUnigram = true; return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, isNotAWord, isBlacklisted, probability, timestamp, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty, wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); } Loading @@ -83,7 +81,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord( return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), isNotAWord, isBlacklisted, probability, timestamp, &pos); unigramProperty, &pos); } bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, Loading Loading @@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, int *const forwardLinkFieldPos) { const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, newPtNodeArrayPos, forwardLinkFieldPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, isNotAWord, isBlacklisted, probability, timestamp); unigramProperty); } bool DynamicPtUpdatingHelper::setPtNodeProbability( const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, bool *const outAddedNewUnigram) { bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { if (originalPtNodeParams->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp); return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty); } else { // Make the node terminal and write the probability. *outAddedNewUnigram = true; const int movedPos = mBuffer->getTailPosition(); int writingPos = movedPos; const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, isNotAWord, isBlacklisted, true /* isTerminal */, originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), probability)); unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, &writingPos)) { unigramProperty, &writingPos)) { return false; } if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { Loading @@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability( } bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty, const int *const codePoints, const int codePointCount) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, codePointCount, isNotAWord, isBlacklisted, probability, timestamp); codePointCount, unigramProperty); } bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp) { const UnigramProperty *const unigramProperty) { int writingPos = mBuffer->getTailPosition(); if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, 1 /* arraySize */, &writingPos)) { return false; } const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( isNotAWord, isBlacklisted, true /* isTerminal */, parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, &writingPos)) { unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, parentPtNodePos, nodeCodePointCount, nodeCodePoints, unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, unigramProperty, &writingPos)) { return false; } if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, Loading @@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( // Returns whether the dictionary updating was succeeded or not. bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) { const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. // Reallocating PtNode: abcde, newNode: abcxy. // abc (1st, not terminal) __ de (2nd) // \_ xy (extra child, terminal) // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. // Otherwise, this method makes 1st part terminal and write information in unigramProperty. // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; Loading @@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } } else { const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( isNotAWord, isBlacklisted, true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, &writingPos)) { unigramProperty, &writingPos)) { return false; } } Loading @@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } if (addsExtraChild) { const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( isNotAWord, isBlacklisted, true /* isTerminal */, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, timestamp, &writingPos)) { unigramProperty, &writingPos)) { return false; } } Loading @@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) const { const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(), Loading native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +11 −14 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ class BufferWithExtendableBuffer; class DynamicPtReadingHelper; class PtNodeReader; class PtNodeWriter; class UnigramProperty; class DynamicPtUpdatingHelper { public: Loading @@ -37,9 +38,8 @@ class DynamicPtUpdatingHelper { // Add a word to the dictionary. If the word already exists, update the probability. bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability, const bool isNotAWord, const bool isBlacklisted, const int timestamp, bool *const outAddedNewUnigram); const int *const wordCodePoints, const int codePointCount, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); // Add a bigram relation from word0Pos to word1Pos. bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, Loading @@ -62,25 +62,22 @@ class DynamicPtUpdatingHelper { PtNodeWriter *const mPtNodeWriter; bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, int *const forwardLinkFieldPos); const int nodeCodePointCount, const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos); bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, bool *const outAddedNewUnigram); bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, const int *const codePoints, const int codePointCount); const UnigramProperty *const unigramProperty, const int *const codePoints, const int codePointCount); bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp); const int nodeCodePointCount, const UnigramProperty *const unigramProperty); bool reallocatePtNodeAndAddNewPtNodes( const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints, const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const int newNodeCodePointCount); const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, Loading native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +5 −3 Original line number Diff line number Diff line Loading @@ -24,6 +24,8 @@ namespace latinime { class UnigramProperty; // Interface class used to write PtNode information. class PtNodeWriter { public: Loading Loading @@ -51,8 +53,8 @@ class PtNodeWriter { virtual bool markPtNodeAsWillBecomeNonTerminal( const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, const int probability, const int timestamp) = 0; virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, const UnigramProperty *const unigramProperty) = 0; virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( const PtNodeParams *const toBeUpdatedPtNodeParams, Loading @@ -65,7 +67,7 @@ class PtNodeWriter { int *const ptNodeWritingPos) = 0; virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) = 0; const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +16 −10 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" Loading Loading @@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal( &writingPos); } bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, const int timestamp) { bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty( const PtNodeParams *const toBeUpdatedPtNodeParams, const UnigramProperty *const unigramProperty) { // Update probability and historical information. // TODO: Update other information in the unigram property. if (!toBeUpdatedPtNodeParams->isTerminal()) { return false; } Loading @@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( mBuffers->getProbabilityDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, newProbability, timestamp); unigramProperty); return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); } Loading Loading @@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) { const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) { int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, ptNodeWritingPos)) { Loading @@ -213,7 +217,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( // Write probability. ProbabilityEntry newProbabilityEntry; const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( &newProbabilityEntry, ptNodeParams->getProbability(), timestamp); &newProbabilityEntry, unigramProperty); return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, &probabilityEntryToWrite); } Loading Loading @@ -379,18 +383,20 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( } const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const int timestamp) const { const ProbabilityEntry *const originalProbabilityEntry, const UnigramProperty *const unigramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp, originalProbabilityEntry->getHistoricalInfo(), unigramProperty->getProbability(), unigramProperty->getTimestamp(), mHeaderPolicy); return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( &updatedHistoricalInfo); } else { return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); return originalProbabilityEntry->createEntryWithUpdatedProbability( unigramProperty->getProbability()); } } Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +7 −6 Original line number Diff line number Diff line Loading @@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { virtual bool markPtNodeAsWillBecomeNonTerminal( const PtNodeParams *const toBeUpdatedPtNodeParams); virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, const int timestamp); virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, const UnigramProperty *const unigramProperty); virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode); Loading @@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { int *const ptNodeWritingPos); virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, Loading Loading @@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const PtNodeParams *const ptNodeParams, int *const outTerminalId, int *const ptNodeWritingPos); // Create updated probability entry using given probability and timestamp. In addition to the // Create updated probability entry using given unigram property. In addition to the // probability, this method updates historical information if needed. // TODO: Update flags belonging to the unigram property. const ProbabilityEntry createUpdatedEntryFrom( const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const int timestamp) const; const ProbabilityEntry *const originalProbabilityEntry, const UnigramProperty *const unigramProperty) const; bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, Loading Loading
native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp +41 −44 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" Loading @@ -29,9 +30,8 @@ const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; bool DynamicPtUpdatingHelper::addUnigramWord( DynamicPtReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability, const bool isNotAWord, const bool isBlacklisted, const int timestamp, bool *const outAddedNewUnigram) { const int *const wordCodePoints, const int codePointCount, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { int parentPos = NOT_A_DICT_POS; while (!readingHelper->isEnd()) { const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); Loading @@ -53,20 +53,18 @@ bool DynamicPtUpdatingHelper::addUnigramWord( if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) { *outAddedNewUnigram = true; return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted, probability, timestamp, wordCodePoints + matchedCodePointCount, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty, wordCodePoints + matchedCodePointCount, codePointCount - matchedCodePointCount); } } // All characters are matched. if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability, timestamp, outAddedNewUnigram); return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram); } if (!ptNodeParams.hasChildren()) { *outAddedNewUnigram = true; return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, isNotAWord, isBlacklisted, probability, timestamp, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty, wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); } Loading @@ -83,7 +81,7 @@ bool DynamicPtUpdatingHelper::addUnigramWord( return createAndInsertNodeIntoPtNodeArray(parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(), codePointCount - readingHelper->getPrevTotalCodePointCount(), isNotAWord, isBlacklisted, probability, timestamp, &pos); unigramProperty, &pos); } bool DynamicPtUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, Loading Loading @@ -115,36 +113,34 @@ bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, int *const forwardLinkFieldPos) { const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, newPtNodeArrayPos, forwardLinkFieldPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, isNotAWord, isBlacklisted, probability, timestamp); unigramProperty); } bool DynamicPtUpdatingHelper::setPtNodeProbability( const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, bool *const outAddedNewUnigram) { bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { if (originalPtNodeParams->isTerminal()) { // Overwrites the probability. *outAddedNewUnigram = false; return mPtNodeWriter->updatePtNodeProbability(originalPtNodeParams, probability, timestamp); return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty); } else { // Make the node terminal and write the probability. *outAddedNewUnigram = true; const int movedPos = mBuffer->getTailPosition(); int writingPos = movedPos; const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, isNotAWord, isBlacklisted, true /* isTerminal */, originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), probability)); unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, &writingPos)) { unigramProperty, &writingPos)) { return false; } if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { Loading @@ -155,31 +151,30 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability( } bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty, const int *const codePoints, const int codePointCount) { const int newPtNodeArrayPos = mBuffer->getTailPosition(); if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { return false; } return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, codePointCount, isNotAWord, isBlacklisted, probability, timestamp); codePointCount, unigramProperty); } bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp) { const UnigramProperty *const unigramProperty) { int writingPos = mBuffer->getTailPosition(); if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, 1 /* arraySize */, &writingPos)) { return false; } const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( isNotAWord, isBlacklisted, true /* isTerminal */, parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, &writingPos)) { unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, parentPtNodePos, nodeCodePointCount, nodeCodePoints, unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, unigramProperty, &writingPos)) { return false; } if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, Loading @@ -192,13 +187,13 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( // Returns whether the dictionary updating was succeeded or not. bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) { const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const int newNodeCodePointCount) { // When addsExtraChild is true, split the reallocating PtNode and add new child. // Reallocating PtNode: abcde, newNode: abcxy. // abc (1st, not terminal) __ de (2nd) // \_ xy (extra child, terminal) // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. // Otherwise, this method makes 1st part terminal and write information in unigramProperty. // Reallocating PtNode: abcde, newNode: abc. // abc (1st, terminal) __ de (2nd) const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; Loading @@ -216,11 +211,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } } else { const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( isNotAWord, isBlacklisted, true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, &writingPos)) { unigramProperty, &writingPos)) { return false; } } Loading @@ -244,11 +240,12 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } if (addsExtraChild) { const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( isNotAWord, isBlacklisted, true /* isTerminal */, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability())); if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, timestamp, &writingPos)) { unigramProperty, &writingPos)) { return false; } } Loading @@ -269,8 +266,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( } const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const int codePointCount, const int *const codePoints, const int probability) const { const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(), Loading
native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h +11 −14 Original line number Diff line number Diff line Loading @@ -26,6 +26,7 @@ class BufferWithExtendableBuffer; class DynamicPtReadingHelper; class PtNodeReader; class PtNodeWriter; class UnigramProperty; class DynamicPtUpdatingHelper { public: Loading @@ -37,9 +38,8 @@ class DynamicPtUpdatingHelper { // Add a word to the dictionary. If the word already exists, update the probability. bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, const int *const wordCodePoints, const int codePointCount, const int probability, const bool isNotAWord, const bool isBlacklisted, const int timestamp, bool *const outAddedNewUnigram); const int *const wordCodePoints, const int codePointCount, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); // Add a bigram relation from word0Pos to word1Pos. bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, Loading @@ -62,25 +62,22 @@ class DynamicPtUpdatingHelper { PtNodeWriter *const mPtNodeWriter; bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, int *const forwardLinkFieldPos); const int nodeCodePointCount, const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos); bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, bool *const outAddedNewUnigram); bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp, const int *const codePoints, const int codePointCount); const UnigramProperty *const unigramProperty, const int *const codePoints, const int codePointCount); bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probability, const int timestamp); const int nodeCodePointCount, const UnigramProperty *const unigramProperty); bool reallocatePtNodeAndAddNewPtNodes( const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints, const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const int newNodeCodePointCount); const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, Loading
native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h +5 −3 Original line number Diff line number Diff line Loading @@ -24,6 +24,8 @@ namespace latinime { class UnigramProperty; // Interface class used to write PtNode information. class PtNodeWriter { public: Loading Loading @@ -51,8 +53,8 @@ class PtNodeWriter { virtual bool markPtNodeAsWillBecomeNonTerminal( const PtNodeParams *const toBeUpdatedPtNodeParams) = 0; virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, const int probability, const int timestamp) = 0; virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, const UnigramProperty *const unigramProperty) = 0; virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( const PtNodeParams *const toBeUpdatedPtNodeParams, Loading @@ -65,7 +67,7 @@ class PtNodeWriter { int *const ptNodeWritingPos) = 0; virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) = 0; const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) = 0; virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.cpp +16 −10 Original line number Diff line number Diff line Loading @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" #include "suggest/core/dictionary/property/unigram_property.h" #include "suggest/policyimpl/dictionary/bigram/ver4_bigram_list_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/shortcut/ver4_shortcut_list_policy.h" Loading Loading @@ -133,9 +134,11 @@ bool Ver4PatriciaTrieNodeWriter::markPtNodeAsWillBecomeNonTerminal( &writingPos); } bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, const int timestamp) { bool Ver4PatriciaTrieNodeWriter::updatePtNodeUnigramProperty( const PtNodeParams *const toBeUpdatedPtNodeParams, const UnigramProperty *const unigramProperty) { // Update probability and historical information. // TODO: Update other information in the unigram property. if (!toBeUpdatedPtNodeParams->isTerminal()) { return false; } Loading @@ -143,7 +146,7 @@ bool Ver4PatriciaTrieNodeWriter::updatePtNodeProbability( mBuffers->getProbabilityDictContent()->getProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId()); const ProbabilityEntry probabilityEntry = createUpdatedEntryFrom(&originalProbabilityEntry, newProbability, timestamp); unigramProperty); return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry( toBeUpdatedPtNodeParams->getTerminalId(), &probabilityEntry); } Loading Loading @@ -204,7 +207,8 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndAdvancePosition( bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos) { const PtNodeParams *const ptNodeParams, const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos) { int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; if (!writePtNodeAndGetTerminalIdAndAdvancePosition(ptNodeParams, &terminalId, ptNodeWritingPos)) { Loading @@ -213,7 +217,7 @@ bool Ver4PatriciaTrieNodeWriter::writeNewTerminalPtNodeAndAdvancePosition( // Write probability. ProbabilityEntry newProbabilityEntry; const ProbabilityEntry probabilityEntryToWrite = createUpdatedEntryFrom( &newProbabilityEntry, ptNodeParams->getProbability(), timestamp); &newProbabilityEntry, unigramProperty); return mBuffers->getMutableProbabilityDictContent()->setProbabilityEntry(terminalId, &probabilityEntryToWrite); } Loading Loading @@ -379,18 +383,20 @@ bool Ver4PatriciaTrieNodeWriter::writePtNodeAndGetTerminalIdAndAdvancePosition( } const ProbabilityEntry Ver4PatriciaTrieNodeWriter::createUpdatedEntryFrom( const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const int timestamp) const { const ProbabilityEntry *const originalProbabilityEntry, const UnigramProperty *const unigramProperty) const { // TODO: Consolidate historical info and probability. if (mHeaderPolicy->hasHistoricalInfoOfWords()) { const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo( originalProbabilityEntry->getHistoricalInfo(), newProbability, timestamp, originalProbabilityEntry->getHistoricalInfo(), unigramProperty->getProbability(), unigramProperty->getTimestamp(), mHeaderPolicy); return originalProbabilityEntry->createEntryWithUpdatedHistoricalInfo( &updatedHistoricalInfo); } else { return originalProbabilityEntry->createEntryWithUpdatedProbability(newProbability); return originalProbabilityEntry->createEntryWithUpdatedProbability( unigramProperty->getProbability()); } } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h +7 −6 Original line number Diff line number Diff line Loading @@ -57,8 +57,8 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { virtual bool markPtNodeAsWillBecomeNonTerminal( const PtNodeParams *const toBeUpdatedPtNodeParams); virtual bool updatePtNodeProbability(const PtNodeParams *const toBeUpdatedPtNodeParams, const int newProbability, const int timestamp); virtual bool updatePtNodeUnigramProperty(const PtNodeParams *const toBeUpdatedPtNodeParams, const UnigramProperty *const unigramProperty); virtual bool updatePtNodeProbabilityAndGetNeedsToKeepPtNodeAfterGC( const PtNodeParams *const toBeUpdatedPtNodeParams, bool *const outNeedsToKeepPtNode); Loading @@ -73,7 +73,7 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { int *const ptNodeWritingPos); virtual bool writeNewTerminalPtNodeAndAdvancePosition(const PtNodeParams *const ptNodeParams, const int timestamp, int *const ptNodeWritingPos); const UnigramProperty *const unigramProperty, int *const ptNodeWritingPos); virtual bool addNewBigramEntry(const PtNodeParams *const sourcePtNodeParams, const PtNodeParams *const targetPtNodeParam, const int probability, const int timestamp, Loading Loading @@ -102,11 +102,12 @@ class Ver4PatriciaTrieNodeWriter : public PtNodeWriter { const PtNodeParams *const ptNodeParams, int *const outTerminalId, int *const ptNodeWritingPos); // Create updated probability entry using given probability and timestamp. In addition to the // Create updated probability entry using given unigram property. In addition to the // probability, this method updates historical information if needed. // TODO: Update flags belonging to the unigram property. const ProbabilityEntry createUpdatedEntryFrom( const ProbabilityEntry *const originalProbabilityEntry, const int newProbability, const int timestamp) const; const ProbabilityEntry *const originalProbabilityEntry, const UnigramProperty *const unigramProperty) const; bool updatePtNodeFlags(const int ptNodePos, const bool isBlacklisted, const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets, const bool hasBigrams, Loading