Loading native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -118,8 +118,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { return multiBigramMap->getBigramProbability( binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), prevWordPos, wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } Loading native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +14 −8 Original line number Diff line number Diff line Loading @@ -112,10 +112,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo->getStructurePolicy(); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); const int length = mBinaryDictionaryInfo->getStructurePolicy()-> const int length = structurePolicy-> getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the Loading @@ -137,10 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, forceLowerCaseSearch); const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo->getStructurePolicy(); int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos); return structurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, Loading @@ -148,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( word1, length1, false /* forceLowerCaseSearch */); const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo->getStructurePolicy(); int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { Loading native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +9 −19 Original line number Diff line number Diff line Loading @@ -18,51 +18,41 @@ #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" namespace latinime { class BinaryDictionaryBigramsIterator { public: BinaryDictionaryBigramsIterator( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {} const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(pos != NOT_A_DICT_POS) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } AK_FORCE_INLINE void next() { mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( mBinaryDictionaryInfo, &mPos); mBigramPos = BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer( mBinaryDictionaryInfo, mBigramFlags, &mPos); mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags); mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos); } AK_FORCE_INLINE int getProbability() const { return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags( mBigramFlags); return mProbability; } AK_FORCE_INLINE int getBigramPos() const { return mBigramPos; } AK_FORCE_INLINE int getFlags() const { return mBigramFlags; } private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy; int mPos; BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags; int mBigramPos; int mProbability; bool mHasNext; }; } // namespace latinime Loading native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +4 −8 Original line number Diff line number Diff line Loading @@ -16,7 +16,6 @@ #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { Loading @@ -38,22 +37,19 @@ const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; /* static */ int TaUtils::getBigramAddressAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, const uint8_t *const dictRoot, const TerminalAttributeFlags flags, int *const pos) { int offset = 0; const int origin = *pos; switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: offset = ByteArrayUtils::readUint8AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); offset = ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: offset = ByteArrayUtils::readUint16AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); offset = ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: offset = ByteArrayUtils::readUint24AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); offset = ByteArrayUtils::readUint24AndAdvancePosition(dictRoot, pos); break; } if (isOffsetNegative(flags)) { Loading native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +6 −8 Original line number Diff line number Diff line Loading @@ -32,9 +32,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils { typedef TerminalAttributeFlags ShortcutFlags; static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { return ByteArrayUtils::readUint8AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); const uint8_t *const dictRoot, int *const pos) { return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); } static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { Loading @@ -47,18 +46,17 @@ class BinaryDictionaryTerminalAttributesReadingUtils { // Bigrams reading methods static AK_FORCE_INLINE void skipExistingBigrams( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); const uint8_t *const dictRoot, int *const pos) { BigramFlags flags = getFlagsAndForwardPointer(dictRoot, pos); while (hasNext(flags)) { *pos += attributeAddressSize(flags); flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); flags = getFlagsAndForwardPointer(dictRoot, pos); } *pos += attributeAddressSize(flags); } static int getBigramAddressAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, int *const pos); const uint8_t *const dictRoot, const BigramFlags flags, int *const pos); // Shortcuts reading methods // This method returns the size of the shortcut list region excluding the shortcut list size Loading Loading
native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -118,8 +118,8 @@ namespace latinime { return ProbabilityUtils::backoff(unigramProbability); } if (multiBigramMap) { return multiBigramMap->getBigramProbability( binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability); return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(), prevWordPos, wordPos, unigramProbability); } return ProbabilityUtils::backoff(unigramProbability); } Loading
native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +14 −8 Original line number Diff line number Diff line Loading @@ -112,10 +112,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int bigramCount = 0; int unigramProbability = 0; int bigramBuffer[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo->getStructurePolicy(); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); const int length = mBinaryDictionaryInfo->getStructurePolicy()-> const int length = structurePolicy-> getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramBuffer, &unigramProbability); // Due to space constraints, the probability for bigrams is approximate - the lower the Loading @@ -137,10 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength, const bool forceLowerCaseSearch) const { if (0 >= prevWordLength) return NOT_A_DICT_POS; int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( prevWord, prevWordLength, forceLowerCaseSearch); const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo->getStructurePolicy(); int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength, forceLowerCaseSearch); if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS; return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos); return structurePolicy->getBigramsPositionOfNode(pos); } bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1, Loading @@ -148,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return false; int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord( word1, length1, false /* forceLowerCaseSearch */); const DictionaryStructureWithBufferPolicy *const structurePolicy = mBinaryDictionaryInfo->getStructurePolicy(); int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_VALID_WORD_POS == nextWordPos) return false; BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos); BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos) { Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +9 −19 Original line number Diff line number Diff line Loading @@ -18,51 +18,41 @@ #define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/policy/dictionary_bigrams_structure_policy.h" namespace latinime { class BinaryDictionaryBigramsIterator { public: BinaryDictionaryBigramsIterator( const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos) : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0), mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {} const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(pos != NOT_A_DICT_POS) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } AK_FORCE_INLINE void next() { mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer( mBinaryDictionaryInfo, &mPos); mBigramPos = BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer( mBinaryDictionaryInfo, mBigramFlags, &mPos); mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags); mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos); } AK_FORCE_INLINE int getProbability() const { return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags( mBigramFlags); return mProbability; } AK_FORCE_INLINE int getBigramPos() const { return mBigramPos; } AK_FORCE_INLINE int getFlags() const { return mBigramFlags; } private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator); const BinaryDictionaryInfo *const mBinaryDictionaryInfo; const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy; int mPos; BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags; int mBigramPos; int mProbability; bool mHasNext; }; } // namespace latinime Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.cpp +4 −8 Original line number Diff line number Diff line Loading @@ -16,7 +16,6 @@ #include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/byte_array_utils.h" namespace latinime { Loading @@ -38,22 +37,19 @@ const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2; const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15; /* static */ int TaUtils::getBigramAddressAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags, const uint8_t *const dictRoot, const TerminalAttributeFlags flags, int *const pos) { int offset = 0; const int origin = *pos; switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) { case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: offset = ByteArrayUtils::readUint8AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); offset = ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: offset = ByteArrayUtils::readUint16AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); offset = ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos); break; case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: offset = ByteArrayUtils::readUint24AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); offset = ByteArrayUtils::readUint24AndAdvancePosition(dictRoot, pos); break; } if (isOffsetNegative(flags)) { Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h +6 −8 Original line number Diff line number Diff line Loading @@ -32,9 +32,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils { typedef TerminalAttributeFlags ShortcutFlags; static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { return ByteArrayUtils::readUint8AndAdvancePosition( binaryDictionaryInfo->getDictRoot(), pos); const uint8_t *const dictRoot, int *const pos) { return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos); } static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) { Loading @@ -47,18 +46,17 @@ class BinaryDictionaryTerminalAttributesReadingUtils { // Bigrams reading methods static AK_FORCE_INLINE void skipExistingBigrams( const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) { BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); const uint8_t *const dictRoot, int *const pos) { BigramFlags flags = getFlagsAndForwardPointer(dictRoot, pos); while (hasNext(flags)) { *pos += attributeAddressSize(flags); flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos); flags = getFlagsAndForwardPointer(dictRoot, pos); } *pos += attributeAddressSize(flags); } static int getBigramAddressAndForwardPointer( const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags, int *const pos); const uint8_t *const dictRoot, const BigramFlags flags, int *const pos); // Shortcuts reading methods // This method returns the size of the shortcut list region excluding the shortcut list size Loading