Loading native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +4 −20 Original line number Diff line number Diff line Loading @@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() { */ void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) { // If no bigrams for this exact word, search again in lower case. pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */); } // If still no bigrams, we really don't have them! if (NOT_A_DICT_POS == pos) return; int unigramProbability = 0; int bigramCodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { Loading Loading @@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1, int length1) const { int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos Loading native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +5 −0 Original line number Diff line number Diff line Loading @@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator { mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(pos != NOT_A_DICT_POS) {} BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator) : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy), mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos), mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } Loading native/jni/src/suggest/core/session/dic_traverse_session.cpp +2 −15 Original line number Diff line number Diff line Loading @@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary, mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWordsInfo->getPrevWordCodePoints()) { mPrevWordsPtNodePos[0] = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */); } prevWordsInfo->getPrevWordsTerminalPtNodePos( getDictionaryStructurePolicy(), mPrevWordsPtNodePos); } void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, Loading native/jni/src/suggest/core/session/prev_words_info.h +53 −4 Original line number Diff line number Diff line Loading @@ -18,6 +18,8 @@ #define LATINIME_PREV_WORDS_INFO_H #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { Loading @@ -38,17 +40,64 @@ class PrevWordsInfo { mPrevWordCodePointCount[0] = prevWordCodePointCount; mIsBeginningOfSentence[0] = isBeginningOfSentence; } const int *getPrevWordCodePoints() const { return mPrevWordCodePoints[0]; void getPrevWordsTerminalPtNodePos( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, int *const outPrevWordsTerminalPtNodePos) const { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, mPrevWordCodePoints[i], mPrevWordCodePointCount[i], mIsBeginningOfSentence[i]); } } int getPrevWordCodePointCount() const { return mPrevWordCodePointCount[0]; BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the // dictionary or has no bigrams if (NOT_A_DICT_POS == pos) { // If no bigrams for this exact word, search again in lower case. pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */); } return BinaryDictionaryBigramsIterator( dictStructurePolicy->getBigramsStructurePolicy(), pos); } private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); static int getTerminalPtNodePosOfWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, const bool isBeginningOfSentence) { if (!dictStructurePolicy || !wordCodePoints) { return NOT_A_DICT_POS; } const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */); if (wordPtNodePos != NOT_A_DICT_POS) { return wordPtNodePos; } // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". return dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */); } static int getBigramListPositionForWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *wordCodePoints, const int wordCodePointCount, const bool forceLowerCaseSearch) { if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS; const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, forceLowerCaseSearch); if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS; return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos); } void clear() { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { mPrevWordCodePoints[i] = nullptr; Loading Loading
native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +4 −20 Original line number Diff line number Diff line Loading @@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() { */ void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) { // If no bigrams for this exact word, search again in lower case. pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */); } // If still no bigrams, we really don't have them! if (NOT_A_DICT_POS == pos) return; int unigramProbability = 0; int bigramCodePoints[MAX_WORD_LENGTH]; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { Loading Loading @@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1, int length1) const { int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; BinaryDictionaryBigramsIterator bigramsIt( mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +5 −0 Original line number Diff line number Diff line Loading @@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator { mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(pos != NOT_A_DICT_POS) {} BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator) : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy), mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos), mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {} AK_FORCE_INLINE bool hasNext() const { return mHasNext; } Loading
native/jni/src/suggest/core/session/dic_traverse_session.cpp +2 −15 Original line number Diff line number Diff line Loading @@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary, mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWordsInfo->getPrevWordCodePoints()) { mPrevWordsPtNodePos[0] = NOT_A_DICT_POS; return; } // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) { // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */); } prevWordsInfo->getPrevWordsTerminalPtNodePos( getDictionaryStructurePolicy(), mPrevWordsPtNodePos); } void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, Loading
native/jni/src/suggest/core/session/prev_words_info.h +53 −4 Original line number Diff line number Diff line Loading @@ -18,6 +18,8 @@ #define LATINIME_PREV_WORDS_INFO_H #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { Loading @@ -38,17 +40,64 @@ class PrevWordsInfo { mPrevWordCodePointCount[0] = prevWordCodePointCount; mIsBeginningOfSentence[0] = isBeginningOfSentence; } const int *getPrevWordCodePoints() const { return mPrevWordCodePoints[0]; void getPrevWordsTerminalPtNodePos( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, int *const outPrevWordsTerminalPtNodePos) const { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, mPrevWordCodePoints[i], mPrevWordCodePointCount[i], mIsBeginningOfSentence[i]); } } int getPrevWordCodePointCount() const { return mPrevWordCodePointCount[0]; BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */); // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the // dictionary or has no bigrams if (NOT_A_DICT_POS == pos) { // If no bigrams for this exact word, search again in lower case. pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */); } return BinaryDictionaryBigramsIterator( dictStructurePolicy->getBigramsStructurePolicy(), pos); } private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); static int getTerminalPtNodePosOfWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, const bool isBeginningOfSentence) { if (!dictStructurePolicy || !wordCodePoints) { return NOT_A_DICT_POS; } const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */); if (wordPtNodePos != NOT_A_DICT_POS) { return wordPtNodePos; } // Check bigrams for lower-cased previous word if original was not found. Useful for // auto-capitalized words like "The [current_word]". return dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */); } static int getBigramListPositionForWord( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *wordCodePoints, const int wordCodePointCount, const bool forceLowerCaseSearch) { if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS; const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( wordCodePoints, wordCodePointCount, forceLowerCaseSearch); if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS; return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos); } void clear() { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { mPrevWordCodePoints[i] = nullptr; Loading