Loading native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +12 −8 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ const char *const HeaderPolicy::DATE_KEY = "date"; const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; const char *const HeaderPolicy::TRIGRAM_COUNT_KEY = "TRIGRAM_COUNT"; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; // Historical info is information that is needed to support decaying such as timestamp, level and // count. Loading Loading @@ -94,12 +95,11 @@ bool HeaderPolicy::readRequiresGermanUmlautProcessing() const { } bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const { const EntryCounts &entryCounts, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const { int writingPos = 0; DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap); fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount, extendedRegionSize, &attributeMapToWrite); fillInHeader(updatesLastDecayedTime, entryCounts, extendedRegionSize, &attributeMapToWrite); if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion, &writingPos)) { return false; Loading @@ -126,11 +126,15 @@ bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTim return true; } void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const EntryCounts &entryCounts, const int extendedRegionSize, DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const { HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, entryCounts.getUnigramCount()); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, entryCounts.getBigramCount()); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, TRIGRAM_COUNT_KEY, entryCounts.getTrigramCount()); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY, extendedRegionSize); // Set the current time as the generation time. Loading native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +27 −7 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/utils/entry_counters.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "utils/char_utils.h" #include "utils/time_keeper.h" Loading Loading @@ -49,6 +50,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, BIGRAM_COUNT_KEY, 0 /* defaultValue */)), mTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, TRIGRAM_COUNT_KEY, 0 /* defaultValue */)), mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( Loading @@ -60,6 +63,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Constructs header information using an attribute map. Loading @@ -77,7 +82,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( Loading @@ -87,6 +92,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Copy header information Loading @@ -99,12 +106,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mIsDecayingDict(headerPolicy->mIsDecayingDict), mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime), mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount), mTrigramCount(headerPolicy->mTrigramCount), mExtendedRegionSize(headerPolicy->mExtendedRegionSize), mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords), mForgettingCurveProbabilityValuesTableId( headerPolicy->mForgettingCurveProbabilityValuesTableId), mMaxUnigramCount(headerPolicy->mMaxUnigramCount), mMaxBigramCount(headerPolicy->mMaxBigramCount), mMaxTrigramCount(headerPolicy->mMaxTrigramCount), mCodePointTable(headerPolicy->mCodePointTable) {} // Temporary dummy header. Loading @@ -112,10 +121,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0), mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f), mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0), mCodePointTable(nullptr) {} mMaxTrigramCount(0), mCodePointTable(nullptr) {} ~HeaderPolicy() {} Loading Loading @@ -183,6 +192,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mBigramCount; } AK_FORCE_INLINE int getTrigramCount() const { return mTrigramCount; } AK_FORCE_INLINE int getExtendedRegionSize() const { return mExtendedRegionSize; } Loading Loading @@ -212,15 +225,19 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMaxBigramCount; } AK_FORCE_INLINE int getMaxTrigramCount() const { return mMaxTrigramCount; } void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const; const EntryCounts &entryCounts, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const; void fillInHeader(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, void fillInHeader(const bool updatesLastDecayedTime, const EntryCounts &entryCounts, const int extendedRegionSize, DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const; AK_FORCE_INLINE const std::vector<int> *getLocale() const { Loading @@ -245,6 +262,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const LAST_DECAYED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; static const char *const TRIGRAM_COUNT_KEY; static const char *const EXTENDED_REGION_SIZE_KEY; static const char *const HAS_HISTORICAL_INFO_KEY; static const char *const LOCALE_KEY; Loading Loading @@ -273,11 +291,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mLastDecayedTime; const int mUnigramCount; const int mBigramCount; const int mTrigramCount; const int mExtendedRegionSize; const bool mHasHistoricalInfoOfWords; const int mForgettingCurveProbabilityValuesTableId; const int mMaxUnigramCount; const int mMaxBigramCount; const int mMaxTrigramCount; const int *const mCodePointTable; const std::vector<int> readLocale() const; Loading native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +9 −9 Original line number Diff line number Diff line Loading @@ -303,7 +303,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty, &addedNewUnigram)) { if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { mUnigramCount++; mEntryCounters.incrementUnigramCount(); } if (unigramProperty->getShortcuts().size() > 0) { // Add shortcut target. Loading Loading @@ -397,7 +397,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos, ngramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; mEntryCounters.incrementBigramCount(); } return true; } else { Loading Loading @@ -438,7 +438,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramCon const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); if (mUpdatingHelper.removeNgramEntry( PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos)) { mBigramCount--; mEntryCounters.decrementBigramCount(); return true; } else { return false; Loading Loading @@ -477,7 +477,7 @@ bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); return false; } if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) { AKLOGE("Cannot flush the dictionary to file."); mIsCorrupted = true; return false; Loading Loading @@ -515,7 +515,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { // Needs to reduce dictionary size. return true; } else if (mHeaderPolicy->isDecayingDict()) { return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy); } return false; Loading @@ -525,19 +525,19 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer char *const outResult, const int maxResultLength) { const int compareLength = queryLength + 1 /* terminator */; if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mUnigramCount); snprintf(outResult, maxResultLength, "%d", mEntryCounters.getUnigramCount()); } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mBigramCount); snprintf(outResult, maxResultLength, "%d", mEntryCounters.getBigramCount()); } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::getUnigramCountHardLimit( ForgettingCurveUtils::getEntryCountHardLimit( mHeaderPolicy->getMaxUnigramCount()) : static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::getBigramCountHardLimit( ForgettingCurveUtils::getEntryCountHardLimit( mHeaderPolicy->getMaxBigramCount()) : static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); } Loading native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +4 −4 Original line number Diff line number Diff line Loading @@ -41,6 +41,7 @@ #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/entry_counters.h" #include "utils/int_array_view.h" namespace latinime { Loading Loading @@ -75,8 +76,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), mBigramCount(mHeaderPolicy->getBigramCount()), mEntryCounters(mHeaderPolicy->getUnigramCount(), mHeaderPolicy->getBigramCount(), mHeaderPolicy->getTrigramCount()), mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; virtual int getRootPosition() const { Loading Loading @@ -163,8 +164,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4PatriciaTrieNodeWriter mNodeWriter; DynamicPtUpdatingHelper mUpdatingHelper; Ver4PatriciaTrieWritingHelper mWritingHelper; int mUnigramCount; int mBigramCount; MutableEntryCounters mEntryCounters; std::vector<int> mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; Loading native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp +6 −5 Original line number Diff line number Diff line Loading @@ -43,18 +43,18 @@ namespace backward { namespace v402 { bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const { const EntryCounts &entryCounts) const { const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) { entryCounts, extendedRegionSize, &headerBuffer)) { AKLOGE("Cannot write header structure to buffer. " "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " "extendedRegionSize: %d", false, unigramCount, bigramCount, extendedRegionSize); "extendedRegionSize: %d", false, entryCounters.getUnigramCount(), entryCounters.getBigramCount(), extendedRegionSize); return false; } return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); Loading @@ -74,7 +74,8 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) { EntryCounts(unigramCount, bigramCount, 0 /* trigramCount */), 0 /* extendedRegionSize */, &headerBuffer)) { return false; } return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); Loading Loading
native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +12 −8 Original line number Diff line number Diff line Loading @@ -30,6 +30,7 @@ const char *const HeaderPolicy::DATE_KEY = "date"; const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; const char *const HeaderPolicy::TRIGRAM_COUNT_KEY = "TRIGRAM_COUNT"; const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; // Historical info is information that is needed to support decaying such as timestamp, level and // count. Loading Loading @@ -94,12 +95,11 @@ bool HeaderPolicy::readRequiresGermanUmlautProcessing() const { } bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const { const EntryCounts &entryCounts, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const { int writingPos = 0; DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap); fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount, extendedRegionSize, &attributeMapToWrite); fillInHeader(updatesLastDecayedTime, entryCounts, extendedRegionSize, &attributeMapToWrite); if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion, &writingPos)) { return false; Loading @@ -126,11 +126,15 @@ bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTim return true; } void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const EntryCounts &entryCounts, const int extendedRegionSize, DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const { HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, entryCounts.getUnigramCount()); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, entryCounts.getBigramCount()); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, TRIGRAM_COUNT_KEY, entryCounts.getTrigramCount()); HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY, extendedRegionSize); // Set the current time as the generation time. Loading
native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +27 −7 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" #include "suggest/policyimpl/dictionary/utils/entry_counters.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "utils/char_utils.h" #include "utils/time_keeper.h" Loading Loading @@ -49,6 +50,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, BIGRAM_COUNT_KEY, 0 /* defaultValue */)), mTrigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, TRIGRAM_COUNT_KEY, 0 /* defaultValue */)), mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( Loading @@ -60,6 +63,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Constructs header information using an attribute map. Loading @@ -77,7 +82,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( Loading @@ -87,6 +92,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mMaxTrigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_TRIGRAM_COUNT_KEY, DEFAULT_MAX_TRIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Copy header information Loading @@ -99,12 +106,14 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mIsDecayingDict(headerPolicy->mIsDecayingDict), mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime), mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount), mTrigramCount(headerPolicy->mTrigramCount), mExtendedRegionSize(headerPolicy->mExtendedRegionSize), mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords), mForgettingCurveProbabilityValuesTableId( headerPolicy->mForgettingCurveProbabilityValuesTableId), mMaxUnigramCount(headerPolicy->mMaxUnigramCount), mMaxBigramCount(headerPolicy->mMaxBigramCount), mMaxTrigramCount(headerPolicy->mMaxTrigramCount), mCodePointTable(headerPolicy->mCodePointTable) {} // Temporary dummy header. Loading @@ -112,10 +121,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0), mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f), mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mTrigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), mForgettingCurveProbabilityValuesTableId(0), mMaxUnigramCount(0), mMaxBigramCount(0), mCodePointTable(nullptr) {} mMaxTrigramCount(0), mCodePointTable(nullptr) {} ~HeaderPolicy() {} Loading Loading @@ -183,6 +192,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mBigramCount; } AK_FORCE_INLINE int getTrigramCount() const { return mTrigramCount; } AK_FORCE_INLINE int getExtendedRegionSize() const { return mExtendedRegionSize; } Loading Loading @@ -212,15 +225,19 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMaxBigramCount; } AK_FORCE_INLINE int getMaxTrigramCount() const { return mMaxTrigramCount; } void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const; const EntryCounts &entryCounts, const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const; void fillInHeader(const bool updatesLastDecayedTime, const int unigramCount, const int bigramCount, const int extendedRegionSize, void fillInHeader(const bool updatesLastDecayedTime, const EntryCounts &entryCounts, const int extendedRegionSize, DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const; AK_FORCE_INLINE const std::vector<int> *getLocale() const { Loading @@ -245,6 +262,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const char *const LAST_DECAYED_TIME_KEY; static const char *const UNIGRAM_COUNT_KEY; static const char *const BIGRAM_COUNT_KEY; static const char *const TRIGRAM_COUNT_KEY; static const char *const EXTENDED_REGION_SIZE_KEY; static const char *const HAS_HISTORICAL_INFO_KEY; static const char *const LOCALE_KEY; Loading Loading @@ -273,11 +291,13 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mLastDecayedTime; const int mUnigramCount; const int mBigramCount; const int mTrigramCount; const int mExtendedRegionSize; const bool mHasHistoricalInfoOfWords; const int mForgettingCurveProbabilityValuesTableId; const int mMaxUnigramCount; const int mMaxBigramCount; const int mMaxTrigramCount; const int *const mCodePointTable; const std::vector<int> readLocale() const; Loading
native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +9 −9 Original line number Diff line number Diff line Loading @@ -303,7 +303,7 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty, &addedNewUnigram)) { if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { mUnigramCount++; mEntryCounters.incrementUnigramCount(); } if (unigramProperty->getShortcuts().size() > 0) { // Add shortcut target. Loading Loading @@ -397,7 +397,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos, ngramProperty, &addedNewBigram)) { if (addedNewBigram) { mBigramCount++; mEntryCounters.incrementBigramCount(); } return true; } else { Loading Loading @@ -438,7 +438,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramCon const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]); if (mUpdatingHelper.removeNgramEntry( PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos)) { mBigramCount--; mEntryCounters.decrementBigramCount(); return true; } else { return false; Loading Loading @@ -477,7 +477,7 @@ bool Ver4PatriciaTriePolicy::flush(const char *const filePath) { AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); return false; } if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) { if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) { AKLOGE("Cannot flush the dictionary to file."); mIsCorrupted = true; return false; Loading Loading @@ -515,7 +515,7 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { // Needs to reduce dictionary size. return true; } else if (mHeaderPolicy->isDecayingDict()) { return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount, return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy); } return false; Loading @@ -525,19 +525,19 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer char *const outResult, const int maxResultLength) { const int compareLength = queryLength + 1 /* terminator */; if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mUnigramCount); snprintf(outResult, maxResultLength, "%d", mEntryCounters.getUnigramCount()); } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mBigramCount); snprintf(outResult, maxResultLength, "%d", mEntryCounters.getBigramCount()); } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::getUnigramCountHardLimit( ForgettingCurveUtils::getEntryCountHardLimit( mHeaderPolicy->getMaxUnigramCount()) : static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) { snprintf(outResult, maxResultLength, "%d", mHeaderPolicy->isDecayingDict() ? ForgettingCurveUtils::getBigramCountHardLimit( ForgettingCurveUtils::getEntryCountHardLimit( mHeaderPolicy->getMaxBigramCount()) : static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE)); } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +4 −4 Original line number Diff line number Diff line Loading @@ -41,6 +41,7 @@ #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/entry_counters.h" #include "utils/int_array_view.h" namespace latinime { Loading Loading @@ -75,8 +76,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter), mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy->getUnigramCount()), mBigramCount(mHeaderPolicy->getBigramCount()), mEntryCounters(mHeaderPolicy->getUnigramCount(), mHeaderPolicy->getBigramCount(), mHeaderPolicy->getTrigramCount()), mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}; virtual int getRootPosition() const { Loading Loading @@ -163,8 +164,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4PatriciaTrieNodeWriter mNodeWriter; DynamicPtUpdatingHelper mUpdatingHelper; Ver4PatriciaTrieWritingHelper mWritingHelper; int mUnigramCount; int mBigramCount; MutableEntryCounters mEntryCounters; std::vector<int> mTerminalPtNodePositionsForIteratingWords; mutable bool mIsCorrupted; Loading
native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp +6 −5 Original line number Diff line number Diff line Loading @@ -43,18 +43,18 @@ namespace backward { namespace v402 { bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, const int unigramCount, const int bigramCount) const { const EntryCounts &entryCounts) const { const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) { entryCounts, extendedRegionSize, &headerBuffer)) { AKLOGE("Cannot write header structure to buffer. " "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " "extendedRegionSize: %d", false, unigramCount, bigramCount, extendedRegionSize); "extendedRegionSize: %d", false, entryCounters.getUnigramCount(), entryCounters.getBigramCount(), extendedRegionSize); return false; } return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); Loading @@ -74,7 +74,8 @@ bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeAr BufferWithExtendableBuffer headerBuffer( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) { EntryCounts(unigramCount, bigramCount, 0 /* trigramCount */), 0 /* extendedRegionSize */, &headerBuffer)) { return false; } return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); Loading