Loading native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +15 −4 Original line number Diff line number Diff line Loading @@ -65,7 +65,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Constructs header information using an attribute map. HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, Loading Loading @@ -97,7 +98,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Copy header information HeaderPolicy(const HeaderPolicy *const headerPolicy) Loading @@ -118,7 +120,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mForgettingCurveDurationToLevelDown( headerPolicy->mForgettingCurveDurationToLevelDown), mMaxUnigramCount(headerPolicy->mMaxUnigramCount), mMaxBigramCount(headerPolicy->mMaxBigramCount) {} mMaxBigramCount(headerPolicy->mMaxBigramCount), mCodePointTable(headerPolicy->mCodePointTable) {} // Temporary dummy header. HeaderPolicy() Loading @@ -128,7 +131,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0), mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {} mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0), mCodePointTable(nullptr) {} ~HeaderPolicy() {} Loading @@ -139,6 +143,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { switch (mDictFormatVersion) { case FormatUtils::VERSION_2: return FormatUtils::VERSION_2; case FormatUtils::VERSION_201: return FormatUtils::VERSION_201; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: return FormatUtils::VERSION_4_ONLY_FOR_TESTING; case FormatUtils::VERSION_4: Loading Loading @@ -250,6 +256,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mDictFormatVersion >= FormatUtils::VERSION_4; } const int *getCodePointTable() const { return mCodePointTable; } private: DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); Loading Loading @@ -295,6 +305,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mForgettingCurveDurationToLevelDown; const int mMaxUnigramCount; const int mMaxBigramCount; const int *const mCodePointTable; const std::vector<int> readLocale() const; float readMultipleWordCostMultiplier() const; Loading native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +21 −6 Original line number Diff line number Diff line Loading @@ -18,6 +18,7 @@ #include <cctype> #include <cstdio> #include <memory> #include <vector> #include "defines.h" Loading @@ -34,12 +35,13 @@ namespace latinime { const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11; const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048; const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4; const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4; const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable"; const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; Loading Loading @@ -73,20 +75,32 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; return; } int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH]; std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]); while (pos < headerSize) { // The values in the header don't use the code point table for their encoding. const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos); MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos); std::vector<int> key; key.insert(key.end(), keyBuffer, keyBuffer + keyLength); const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos); MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos); std::vector<int> value; value.insert(value.end(), valueBuffer, valueBuffer + valueLength); value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength); headerAttributes->insert(AttributeMap::value_type(key, value)); } } /* static */ const int *HeaderReadWriteUtils::readCodePointTable( AttributeMap *const headerAttributes) { AttributeMap::key_type keyVector; insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector); AttributeMap::const_iterator it = headerAttributes->find(keyVector); if (it == headerAttributes->end()) { return nullptr; } return it->second.data(); } /* static */ bool HeaderReadWriteUtils::writeDictionaryVersion( BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, int *const writingPos) { Loading @@ -96,7 +110,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; } switch (version) { case FormatUtils::VERSION_2: // Version 2 dictionary writing is not supported. case FormatUtils::VERSION_201: // Version 2 or 201 dictionary writing is not supported. return false; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: Loading native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +5 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,9 @@ class HeaderReadWriteUtils { static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes); static const int *readCodePointTable( DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes); static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, int *const writingPos); Loading Loading @@ -101,6 +104,8 @@ class HeaderReadWriteUtils { static const int HEADER_FLAG_SIZE; static const int HEADER_SIZE_FIELD_SIZE; static const char *const CODE_POINT_TABLE_KEY; // Value for the "flags" field. It's unused at the moment. static const DictionaryFlags NO_FLAGS; Loading native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp +4 −3 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h" Loading Loading @@ -59,8 +60,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce const int parentPos = DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos); int codePoints[MAX_WORD_LENGTH]; const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos); int terminalIdFieldPos = NOT_A_DICT_POS; int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; int probability = NOT_A_PROBABILITY; Loading Loading @@ -98,7 +99,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce // The destination position is stored at the same place as the parent position. return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos); } else { return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints, terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos, newSiblingNodePos); } Loading native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -114,7 +114,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str mmappedBuffer->getReadOnlyByteArrayView()); switch (formatVersion) { case FormatUtils::VERSION_2: AKLOGE("Given path is a directory but the format is version 2. path: %s", path); case FormatUtils::VERSION_201: AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path); break; case FormatUtils::VERSION_4: { return newPolicyForV4Dict<backward::v402::Ver4DictConstants, Loading Loading @@ -175,6 +176,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str } switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); case FormatUtils::VERSION_4_ONLY_FOR_TESTING: Loading Loading
native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +15 −4 Original line number Diff line number Diff line Loading @@ -65,7 +65,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Constructs header information using an attribute map. HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, Loading Loading @@ -97,7 +98,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)), mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {} // Copy header information HeaderPolicy(const HeaderPolicy *const headerPolicy) Loading @@ -118,7 +120,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mForgettingCurveDurationToLevelDown( headerPolicy->mForgettingCurveDurationToLevelDown), mMaxUnigramCount(headerPolicy->mMaxUnigramCount), mMaxBigramCount(headerPolicy->mMaxBigramCount) {} mMaxBigramCount(headerPolicy->mMaxBigramCount), mCodePointTable(headerPolicy->mCodePointTable) {} // Temporary dummy header. HeaderPolicy() Loading @@ -128,7 +131,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0), mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {} mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0), mCodePointTable(nullptr) {} ~HeaderPolicy() {} Loading @@ -139,6 +143,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { switch (mDictFormatVersion) { case FormatUtils::VERSION_2: return FormatUtils::VERSION_2; case FormatUtils::VERSION_201: return FormatUtils::VERSION_201; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: return FormatUtils::VERSION_4_ONLY_FOR_TESTING; case FormatUtils::VERSION_4: Loading Loading @@ -250,6 +256,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mDictFormatVersion >= FormatUtils::VERSION_4; } const int *getCodePointTable() const { return mCodePointTable; } private: DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); Loading Loading @@ -295,6 +305,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { const int mForgettingCurveDurationToLevelDown; const int mMaxUnigramCount; const int mMaxBigramCount; const int *const mCodePointTable; const std::vector<int> readLocale() const; float readMultipleWordCostMultiplier() const; Loading
native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +21 −6 Original line number Diff line number Diff line Loading @@ -18,6 +18,7 @@ #include <cctype> #include <cstdio> #include <memory> #include <vector> #include "defines.h" Loading @@ -34,12 +35,13 @@ namespace latinime { const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11; const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048; const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4; const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4; const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable"; const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; Loading Loading @@ -73,20 +75,32 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; return; } int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH]; std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]); while (pos < headerSize) { // The values in the header don't use the code point table for their encoding. const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos); MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos); std::vector<int> key; key.insert(key.end(), keyBuffer, keyBuffer + keyLength); const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos); MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos); std::vector<int> value; value.insert(value.end(), valueBuffer, valueBuffer + valueLength); value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength); headerAttributes->insert(AttributeMap::value_type(key, value)); } } /* static */ const int *HeaderReadWriteUtils::readCodePointTable( AttributeMap *const headerAttributes) { AttributeMap::key_type keyVector; insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector); AttributeMap::const_iterator it = headerAttributes->find(keyVector); if (it == headerAttributes->end()) { return nullptr; } return it->second.data(); } /* static */ bool HeaderReadWriteUtils::writeDictionaryVersion( BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, int *const writingPos) { Loading @@ -96,7 +110,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; } switch (version) { case FormatUtils::VERSION_2: // Version 2 dictionary writing is not supported. case FormatUtils::VERSION_201: // Version 2 or 201 dictionary writing is not supported. return false; case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: Loading
native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h +5 −0 Original line number Diff line number Diff line Loading @@ -46,6 +46,9 @@ class HeaderReadWriteUtils { static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes); static const int *readCodePointTable( DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes); static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, int *const writingPos); Loading Loading @@ -101,6 +104,8 @@ class HeaderReadWriteUtils { static const int HEADER_FLAG_SIZE; static const int HEADER_SIZE_FIELD_SIZE; static const char *const CODE_POINT_TABLE_KEY; // Value for the "flags" field. It's unused at the moment. static const DictionaryFlags NO_FLAGS; Loading
native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.cpp +4 −3 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h" Loading Loading @@ -59,8 +60,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce const int parentPos = DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos); int codePoints[MAX_WORD_LENGTH]; const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos); const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition( dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos); int terminalIdFieldPos = NOT_A_DICT_POS; int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID; int probability = NOT_A_PROBABILITY; Loading Loading @@ -98,7 +99,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce // The destination position is stored at the same place as the parent position. return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos); } else { return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints, return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints, terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos, newSiblingNodePos); } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +3 −1 Original line number Diff line number Diff line Loading @@ -114,7 +114,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str mmappedBuffer->getReadOnlyByteArrayView()); switch (formatVersion) { case FormatUtils::VERSION_2: AKLOGE("Given path is a directory but the format is version 2. path: %s", path); case FormatUtils::VERSION_201: AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path); break; case FormatUtils::VERSION_4: { return newPolicyForV4Dict<backward::v402::Ver4DictConstants, Loading Loading @@ -175,6 +176,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str } switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) { case FormatUtils::VERSION_2: case FormatUtils::VERSION_201: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); case FormatUtils::VERSION_4_ONLY_FOR_TESTING: Loading