Loading native/jni/src/bigram_dictionary.cpp +1 −0 Original line number Diff line number Diff line Loading @@ -123,6 +123,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i } pos = BinaryFormat::skipChildrenPosition(flags, pos); pos = BinaryFormat::skipFrequency(flags, pos); pos = BinaryFormat::skipShortcuts(root, flags, pos); int bigramFlags; int bigramCount = 0; do { Loading native/jni/src/binary_format.h +31 −9 Original line number Diff line number Diff line Loading @@ -40,6 +40,9 @@ class BinaryFormat { // implementations. On this occasion, we made the magic number 32 bits long. const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; const static int SHORTCUT_LIST_SIZE_SIZE = 2; static int detectFormat(const uint8_t* const dict); static unsigned int getHeaderSize(const uint8_t* const dict); static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos); Loading @@ -47,9 +50,10 @@ class BinaryFormat { static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos); static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos); static int skipOtherCharacters(const uint8_t* const dict, const int pos); static int skipAttributes(const uint8_t* const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipFrequency(const uint8_t flags, const int pos); static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos); static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos); static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos); static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags, const int pos); Loading Loading @@ -157,12 +161,12 @@ static inline int attributeAddressSize(const uint8_t flags) { */ } inline int BinaryFormat::skipAttributes(const uint8_t* const dict, const int pos) { static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) { int currentPos = pos; uint8_t flags = getFlagsAndForwardPointer(dict, ¤tPos); uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) { currentPos += attributeAddressSize(flags); flags = getFlagsAndForwardPointer(dict, ¤tPos); flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); } currentPos += attributeAddressSize(flags); return currentPos; Loading @@ -174,6 +178,10 @@ static inline int childrenAddressSize(const uint8_t flags) { /* See the note in attributeAddressSize. The same applies here */ } static inline int shortcutByteSize(const uint8_t* const dict, const int pos) { return ((int)(dict[pos] << 8)) + (dict[pos + 1]); } inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos) { return pos + childrenAddressSize(flags); } Loading @@ -182,16 +190,30 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos; } inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) { newPos = skipAttributes(dict, newPos); return pos + shortcutByteSize(dict, pos); } else { return pos; } } inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos) { if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) { newPos = skipAttributes(dict, newPos); return skipExistingBigrams(dict, pos); } else { return pos; } } inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; newPos = skipShortcuts(dict, flags, newPos); newPos = skipBigrams(dict, flags, newPos); return newPos; } Loading native/jni/src/terminal_attributes.h +12 −8 Original line number Diff line number Diff line Loading @@ -45,13 +45,19 @@ class TerminalAttributes { // Gets the shortcut target itself as a uint16_t string. For parameters and return value // see BinaryFormat::getWordAtAddress. // TODO: make the output an uint32_t* to handle the whole unicode range. inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT); int shortcutAddress = BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos); return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord); unsigned int i; for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos); if (NOT_A_CHARACTER == charCode) break; outWord[i] = (uint16_t)charCode; } mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; return i; } }; Loading @@ -65,12 +71,10 @@ class TerminalAttributes { mDict(dict), mFlags(flags), mStartPos(pos) { } inline bool isShortcutOnly() const { return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY); } inline ShortcutIterator getShortcutIterator() const { return ShortcutIterator(mDict, mStartPos, mFlags); // The size of the shortcuts is stored here so that the whole shortcut chunk can be // skipped quickly, so we ignore it. return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); } }; } // namespace latinime Loading native/jni/src/unigram_dictionary.cpp +5 −5 Original line number Diff line number Diff line Loading @@ -366,10 +366,9 @@ inline void UnigramDictionary::onTerminal(const int freq, WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { if (!terminalAttributes.isShortcutOnly()) { addWord(wordPointer, wordLength, finalFreq, masterQueue); } const int shortcutFreq = finalFreq > 0 ? finalFreq - 1 : 0; // Please note that the shortcut candidates will be added to the master queue only. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); Loading @@ -379,11 +378,12 @@ inline void UnigramDictionary::onTerminal(const int freq, // We need to either modulate the frequency of each shortcut according // to its own shortcut frequency or to make the queue // so that the insert order is protected inside the queue for words // with the same score. // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue); addWord(shortcutTarget, shortcutTargetStringLength, shortcutFreq, masterQueue); } } } Loading native/jni/src/unigram_dictionary.h +0 −4 Original line number Diff line number Diff line Loading @@ -49,10 +49,6 @@ class UnigramDictionary { static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; // Flag for shortcut-only words. Some words are shortcut-only, which means they match when // the user types them but they don't pop in the suggestion strip, only the words they are // shortcuts for do. static const int FLAG_IS_SHORTCUT_ONLY = 0x02; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes Loading Loading
native/jni/src/bigram_dictionary.cpp +1 −0 Original line number Diff line number Diff line Loading @@ -123,6 +123,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i } pos = BinaryFormat::skipChildrenPosition(flags, pos); pos = BinaryFormat::skipFrequency(flags, pos); pos = BinaryFormat::skipShortcuts(root, flags, pos); int bigramFlags; int bigramCount = 0; do { Loading
native/jni/src/binary_format.h +31 −9 Original line number Diff line number Diff line Loading @@ -40,6 +40,9 @@ class BinaryFormat { // implementations. On this occasion, we made the magic number 32 bits long. const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; const static int SHORTCUT_LIST_SIZE_SIZE = 2; static int detectFormat(const uint8_t* const dict); static unsigned int getHeaderSize(const uint8_t* const dict); static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos); Loading @@ -47,9 +50,10 @@ class BinaryFormat { static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos); static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos); static int skipOtherCharacters(const uint8_t* const dict, const int pos); static int skipAttributes(const uint8_t* const dict, const int pos); static int skipChildrenPosition(const uint8_t flags, const int pos); static int skipFrequency(const uint8_t flags, const int pos); static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos); static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos); static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos); static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags, const int pos); Loading Loading @@ -157,12 +161,12 @@ static inline int attributeAddressSize(const uint8_t flags) { */ } inline int BinaryFormat::skipAttributes(const uint8_t* const dict, const int pos) { static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) { int currentPos = pos; uint8_t flags = getFlagsAndForwardPointer(dict, ¤tPos); uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) { currentPos += attributeAddressSize(flags); flags = getFlagsAndForwardPointer(dict, ¤tPos); flags = BinaryFormat::getFlagsAndForwardPointer(dict, ¤tPos); } currentPos += attributeAddressSize(flags); return currentPos; Loading @@ -174,6 +178,10 @@ static inline int childrenAddressSize(const uint8_t flags) { /* See the note in attributeAddressSize. The same applies here */ } static inline int shortcutByteSize(const uint8_t* const dict, const int pos) { return ((int)(dict[pos] << 8)) + (dict[pos + 1]); } inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos) { return pos + childrenAddressSize(flags); } Loading @@ -182,16 +190,30 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) { return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos; } inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) { newPos = skipAttributes(dict, newPos); return pos + shortcutByteSize(dict, pos); } else { return pos; } } inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos) { if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) { newPos = skipAttributes(dict, newPos); return skipExistingBigrams(dict, pos); } else { return pos; } } inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos) { // This function skips all attributes: shortcuts and bigrams. int newPos = pos; newPos = skipShortcuts(dict, flags, newPos); newPos = skipBigrams(dict, flags, newPos); return newPos; } Loading
native/jni/src/terminal_attributes.h +12 −8 Original line number Diff line number Diff line Loading @@ -45,13 +45,19 @@ class TerminalAttributes { // Gets the shortcut target itself as a uint16_t string. For parameters and return value // see BinaryFormat::getWordAtAddress. // TODO: make the output an uint32_t* to handle the whole unicode range. inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) { const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos); mHasNextShortcutTarget = 0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT); int shortcutAddress = BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos); return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord); unsigned int i; for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) { const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos); if (NOT_A_CHARACTER == charCode) break; outWord[i] = (uint16_t)charCode; } mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE; return i; } }; Loading @@ -65,12 +71,10 @@ class TerminalAttributes { mDict(dict), mFlags(flags), mStartPos(pos) { } inline bool isShortcutOnly() const { return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY); } inline ShortcutIterator getShortcutIterator() const { return ShortcutIterator(mDict, mStartPos, mFlags); // The size of the shortcuts is stored here so that the whole shortcut chunk can be // skipped quickly, so we ignore it. return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags); } }; } // namespace latinime Loading
native/jni/src/unigram_dictionary.cpp +5 −5 Original line number Diff line number Diff line Loading @@ -366,10 +366,9 @@ inline void UnigramDictionary::onTerminal(const int freq, WordsPriorityQueue *masterQueue = queuePool->getMasterQueue(); const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength); if (finalFreq != NOT_A_FREQUENCY) { if (!terminalAttributes.isShortcutOnly()) { addWord(wordPointer, wordLength, finalFreq, masterQueue); } const int shortcutFreq = finalFreq > 0 ? finalFreq - 1 : 0; // Please note that the shortcut candidates will be added to the master queue only. TerminalAttributes::ShortcutIterator iterator = terminalAttributes.getShortcutIterator(); Loading @@ -379,11 +378,12 @@ inline void UnigramDictionary::onTerminal(const int freq, // We need to either modulate the frequency of each shortcut according // to its own shortcut frequency or to make the queue // so that the insert order is protected inside the queue for words // with the same score. // with the same score. For the moment we use -1 to make sure the shortcut will // never be in front of the word. uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL]; const int shortcutTargetStringLength = iterator.getNextShortcutTarget( MAX_WORD_LENGTH_INTERNAL, shortcutTarget); addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue); addWord(shortcutTarget, shortcutTargetStringLength, shortcutFreq, masterQueue); } } } Loading
native/jni/src/unigram_dictionary.h +0 −4 Original line number Diff line number Diff line Loading @@ -49,10 +49,6 @@ class UnigramDictionary { static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08; // Flag for bigram presence static const int FLAG_HAS_BIGRAMS = 0x04; // Flag for shortcut-only words. Some words are shortcut-only, which means they match when // the user types them but they don't pop in the suggestion strip, only the words they are // shortcuts for do. static const int FLAG_IS_SHORTCUT_ONLY = 0x02; // Attribute (bigram/shortcut) related flags: // Flag for presence of more attributes Loading