Loading native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -72,10 +72,10 @@ namespace latinime { if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } const int probability = dictionaryStructurePolicy->getProbabilityOfWordInContext( const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) / static_cast<float>(MAX_PROBABILITY); return cost; } Loading native/jni/src/suggest/core/dictionary/dictionary.cpp +4 −3 Original line number Diff line number Diff line Loading @@ -84,9 +84,10 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi if (codePointCount <= 0) { return; } const int probability = mDictStructurePolicy->getProbabilityOfWordInContext(mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability); const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext( mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, wordAttributes.getProbability()); } void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, Loading native/jni/src/suggest/core/dictionary/word_attributes.h 0 → 100644 +60 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_WORD_ATTRIBUTES_H #define LATINIME_WORD_ATTRIBUTES_H #include "defines.h" class WordAttributes { public: // Invalid word attributes. WordAttributes() : mProbability(NOT_A_PROBABILITY), mIsBlacklisted(false), mIsNotAWord(false), mIsPossiblyOffensive(false) {} WordAttributes(const int probability, const bool isBlacklisted, const bool isNotAWord, const bool isPossiblyOffensive) : mProbability(probability), mIsBlacklisted(isBlacklisted), mIsNotAWord(isNotAWord), mIsPossiblyOffensive(isPossiblyOffensive) {} int getProbability() const { return mProbability; } bool isBlacklisted() const { return mIsBlacklisted; } bool isNotAWord() const { return mIsNotAWord; } bool isPossiblyOffensive() const { return mIsPossiblyOffensive; } private: DISALLOW_ASSIGNMENT_OPERATOR(WordAttributes); int mProbability; bool mIsBlacklisted; bool mIsNotAWord; bool mIsPossiblyOffensive; }; // namespace #endif /* LATINIME_WORD_ATTRIBUTES_H */ native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +3 −2 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/word_attributes.h" #include "utils/int_array_view.h" namespace latinime { Loading Loading @@ -57,8 +58,8 @@ class DictionaryStructureWithBufferPolicy { virtual int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const = 0; virtual int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const = 0; virtual const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const = 0; // TODO: Remove virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0; Loading native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +16 −7 Original line number Diff line number Diff line Loading @@ -118,24 +118,33 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, return getWordIdFromTerminalPtNodePos(ptNodePos); } int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const { const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const { if (wordId == NOT_A_WORD_ID) { return NOT_A_PROBABILITY; return WordAttributes(); } const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); if (multiBigramMap) { return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds, wordId, ptNodeParams.getProbability()); const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds, wordId, ptNodeParams.getProbability()); return getWordAttributes(probability, ptNodeParams); } if (prevWordIds) { const int probability = getProbabilityOfWord(prevWordIds, wordId); if (probability != NOT_A_PROBABILITY) { return probability; return getWordAttributes(probability, ptNodeParams); } } return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY), ptNodeParams); } const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const { return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0); } int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, Loading Loading
native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -72,10 +72,10 @@ namespace latinime { if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } const int probability = dictionaryStructurePolicy->getProbabilityOfWordInContext( const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext( dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap); // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. const float cost = static_cast<float>(MAX_PROBABILITY - probability) const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability()) / static_cast<float>(MAX_PROBABILITY); return cost; } Loading
native/jni/src/suggest/core/dictionary/dictionary.cpp +4 −3 Original line number Diff line number Diff line Loading @@ -84,9 +84,10 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi if (codePointCount <= 0) { return; } const int probability = mDictStructurePolicy->getProbabilityOfWordInContext(mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability); const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext( mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, wordAttributes.getProbability()); } void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, Loading
native/jni/src/suggest/core/dictionary/word_attributes.h 0 → 100644 +60 −0 Original line number Diff line number Diff line /* * Copyright (C) 2014, The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATINIME_WORD_ATTRIBUTES_H #define LATINIME_WORD_ATTRIBUTES_H #include "defines.h" class WordAttributes { public: // Invalid word attributes. WordAttributes() : mProbability(NOT_A_PROBABILITY), mIsBlacklisted(false), mIsNotAWord(false), mIsPossiblyOffensive(false) {} WordAttributes(const int probability, const bool isBlacklisted, const bool isNotAWord, const bool isPossiblyOffensive) : mProbability(probability), mIsBlacklisted(isBlacklisted), mIsNotAWord(isNotAWord), mIsPossiblyOffensive(isPossiblyOffensive) {} int getProbability() const { return mProbability; } bool isBlacklisted() const { return mIsBlacklisted; } bool isNotAWord() const { return mIsNotAWord; } bool isPossiblyOffensive() const { return mIsPossiblyOffensive; } private: DISALLOW_ASSIGNMENT_OPERATOR(WordAttributes); int mProbability; bool mIsBlacklisted; bool mIsNotAWord; bool mIsPossiblyOffensive; }; // namespace #endif /* LATINIME_WORD_ATTRIBUTES_H */
native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +3 −2 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h" #include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/word_attributes.h" #include "utils/int_array_view.h" namespace latinime { Loading Loading @@ -57,8 +58,8 @@ class DictionaryStructureWithBufferPolicy { virtual int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const = 0; virtual int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const = 0; virtual const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const = 0; // TODO: Remove virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0; Loading
native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +16 −7 Original line number Diff line number Diff line Loading @@ -118,24 +118,33 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, return getWordIdFromTerminalPtNodePos(ptNodePos); } int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const { const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( const int *const prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const { if (wordId == NOT_A_WORD_ID) { return NOT_A_PROBABILITY; return WordAttributes(); } const int ptNodePos = getTerminalPtNodePosFromWordId(wordId); const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos)); if (multiBigramMap) { return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds, wordId, ptNodeParams.getProbability()); const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds, wordId, ptNodeParams.getProbability()); return getWordAttributes(probability, ptNodeParams); } if (prevWordIds) { const int probability = getProbabilityOfWord(prevWordIds, wordId); if (probability != NOT_A_PROBABILITY) { return probability; return getWordAttributes(probability, ptNodeParams); } } return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY), ptNodeParams); } const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability, const PtNodeParams &ptNodeParams) const { return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0); } int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, Loading