Loading native/jni/src/suggest/core/dictionary/property/unigram_property.h +5 −0 Original line number Diff line number Diff line Loading @@ -71,6 +71,11 @@ class UnigramProperty { return mIsBlacklisted; } bool isPossiblyOffensive() const { // TODO: Have dedicated flag. return mProbability == 0; } bool hasShortcuts() const { return !mShortcuts.empty(); } Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +12 −5 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ bool LanguageModelDictContent::runGC( 0 /* nextLevelBitmapEntryIndex */, outNgramCount); } int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordIds, const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArrayView prevWordIds, const int wordId, const HeaderPolicy *const headerPolicy) const { int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex(); Loading @@ -60,17 +60,24 @@ int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordI } const ProbabilityEntry probabilityEntry = ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo); int probability = NOT_A_PROBABILITY; if (mHasHistoricalInfo) { const int probability = ForgettingCurveUtils::decodeProbability( const int rawProbability = ForgettingCurveUtils::decodeProbability( probabilityEntry.getHistoricalInfo(), headerPolicy) + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */); return std::min(probability, MAX_PROBABILITY); probability = std::min(rawProbability, MAX_PROBABILITY); } else { return probabilityEntry.getProbability(); probability = probabilityEntry.getProbability(); } // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in // probabilityEntry. const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId); return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(), unigramProbabilityEntry.isBlacklisted(), unigramProbabilityEntry.isPossiblyOffensive()); } // Cannot find the word. return NOT_A_PROBABILITY; return WordAttributes(); } ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry( Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +2 −1 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ #include <vector> #include "defines.h" #include "suggest/core/dictionary/word_attributes.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" Loading Loading @@ -128,7 +129,7 @@ class LanguageModelDictContent { const LanguageModelDictContent *const originalContent, int *const outNgramCount); int getWordProbability(const WordIdArrayView prevWordIds, const int wordId, const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId, const HeaderPolicy *const headerPolicy) const; ProbabilityEntry getProbabilityEntry(const int wordId) const { Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +27 −3 Original line number Diff line number Diff line Loading @@ -49,7 +49,9 @@ class ProbabilityEntry { // Create from unigram property. ProbabilityEntry(const UnigramProperty *const unigramProperty) : mFlags(createFlags(unigramProperty->representsBeginningOfSentence())), : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(), unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isPossiblyOffensive())), mProbability(unigramProperty->getProbability()), mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(), unigramProperty->getCount()) {} Loading Loading @@ -85,6 +87,18 @@ class ProbabilityEntry { return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0; } bool isNotAWord() const { return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0; } bool isBlacklisted() const { return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0; } bool isPossiblyOffensive() const { return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0; } uint64_t encode(const bool hasHistoricalInfo) const { uint64_t encodedEntry = static_cast<uint64_t>(mFlags); if (hasHistoricalInfo) { Loading Loading @@ -142,10 +156,20 @@ class ProbabilityEntry { (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1)); } static uint8_t createFlags(const bool representsBeginningOfSentence) { static uint8_t createFlags(const bool representsBeginningOfSentence, const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) { uint8_t flags = 0; if (representsBeginningOfSentence) { flags ^= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE; flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE; } if (isNotAWord) { flags |= Ver4DictConstants::FLAG_NOT_A_WORD; } if (isBlacklisted) { flags |= Ver4DictConstants::FLAG_BLACKLISTED; } if (isPossiblyOffensive) { flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE; } return flags; } Loading native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +3 −0 Original line number Diff line number Diff line Loading @@ -54,6 +54,9 @@ const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1; const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1; const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2; const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4; const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8; const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; Loading Loading
native/jni/src/suggest/core/dictionary/property/unigram_property.h +5 −0 Original line number Diff line number Diff line Loading @@ -71,6 +71,11 @@ class UnigramProperty { return mIsBlacklisted; } bool isPossiblyOffensive() const { // TODO: Have dedicated flag. return mProbability == 0; } bool hasShortcuts() const { return !mShortcuts.empty(); } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.cpp +12 −5 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ bool LanguageModelDictContent::runGC( 0 /* nextLevelBitmapEntryIndex */, outNgramCount); } int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordIds, const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArrayView prevWordIds, const int wordId, const HeaderPolicy *const headerPolicy) const { int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1]; bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex(); Loading @@ -60,17 +60,24 @@ int LanguageModelDictContent::getWordProbability(const WordIdArrayView prevWordI } const ProbabilityEntry probabilityEntry = ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo); int probability = NOT_A_PROBABILITY; if (mHasHistoricalInfo) { const int probability = ForgettingCurveUtils::decodeProbability( const int rawProbability = ForgettingCurveUtils::decodeProbability( probabilityEntry.getHistoricalInfo(), headerPolicy) + ForgettingCurveUtils::getProbabilityBiasForNgram(i + 1 /* n */); return std::min(probability, MAX_PROBABILITY); probability = std::min(rawProbability, MAX_PROBABILITY); } else { return probabilityEntry.getProbability(); probability = probabilityEntry.getProbability(); } // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in // probabilityEntry. const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId); return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(), unigramProbabilityEntry.isBlacklisted(), unigramProbabilityEntry.isPossiblyOffensive()); } // Cannot find the word. return NOT_A_PROBABILITY; return WordAttributes(); } ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry( Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h +2 −1 Original line number Diff line number Diff line Loading @@ -21,6 +21,7 @@ #include <vector> #include "defines.h" #include "suggest/core/dictionary/word_attributes.h" #include "suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h" #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" Loading Loading @@ -128,7 +129,7 @@ class LanguageModelDictContent { const LanguageModelDictContent *const originalContent, int *const outNgramCount); int getWordProbability(const WordIdArrayView prevWordIds, const int wordId, const WordAttributes getWordAttributes(const WordIdArrayView prevWordIds, const int wordId, const HeaderPolicy *const headerPolicy) const; ProbabilityEntry getProbabilityEntry(const int wordId) const { Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_entry.h +27 −3 Original line number Diff line number Diff line Loading @@ -49,7 +49,9 @@ class ProbabilityEntry { // Create from unigram property. ProbabilityEntry(const UnigramProperty *const unigramProperty) : mFlags(createFlags(unigramProperty->representsBeginningOfSentence())), : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(), unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isPossiblyOffensive())), mProbability(unigramProperty->getProbability()), mHistoricalInfo(unigramProperty->getTimestamp(), unigramProperty->getLevel(), unigramProperty->getCount()) {} Loading Loading @@ -85,6 +87,18 @@ class ProbabilityEntry { return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0; } bool isNotAWord() const { return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0; } bool isBlacklisted() const { return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0; } bool isPossiblyOffensive() const { return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0; } uint64_t encode(const bool hasHistoricalInfo) const { uint64_t encodedEntry = static_cast<uint64_t>(mFlags); if (hasHistoricalInfo) { Loading Loading @@ -142,10 +156,20 @@ class ProbabilityEntry { (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1)); } static uint8_t createFlags(const bool representsBeginningOfSentence) { static uint8_t createFlags(const bool representsBeginningOfSentence, const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) { uint8_t flags = 0; if (representsBeginningOfSentence) { flags ^= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE; flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE; } if (isNotAWord) { flags |= Ver4DictConstants::FLAG_NOT_A_WORD; } if (isBlacklisted) { flags |= Ver4DictConstants::FLAG_BLACKLISTED; } if (isPossiblyOffensive) { flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE; } return flags; } Loading
native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.cpp +3 −0 Original line number Diff line number Diff line Loading @@ -54,6 +54,9 @@ const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1; const uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1; const uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2; const uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4; const uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8; const uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; Loading