Loading native/jni/src/suggest/core/dicnode/dic_node.h +4 −7 Original line number Diff line number Diff line Loading @@ -117,7 +117,7 @@ class DicNode { int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos(); for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) { newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i); newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1]; } mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos); mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, Loading Loading @@ -208,12 +208,9 @@ class DicNode { return mDicNodeProperties.getPtNodePos(); } // Used to get n-gram probability in DicNodeUtils. n is 1-indexed. int getNthPrevWordTerminalPtNodePos(const int n) const { if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { return NOT_A_DICT_POS; } return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1]; // TODO: Use view class to return PtNodePos array. const int *getPrevWordsTerminalPtNodePos() const { return mDicNodeProperties.getPrevWordsTerminalPtNodePos(); } // Used in DicNodeUtils Loading native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +2 −9 Original line number Diff line number Diff line Loading @@ -85,17 +85,10 @@ namespace latinime { const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { const int unigramProbability = dicNode->getProbability(); const int ptNodePos = dicNode->getPtNodePos(); const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */); if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos(); return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordTerminalPtNodePos, ptNodePos, unigramProbability); prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); Loading native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +34 −30 Original line number Diff line number Diff line Loading @@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = // Also caches the bigrams if there is space remaining and they have not been cached already. int MultiBigramMap::getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int wordPosition, const int nextWordPosition, const int unigramProbability) { const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability) { if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } std::unordered_map<int, BigramMap>::const_iterator mapPosition = mBigramMaps.find(wordPosition); mBigramMaps.find(prevWordsPtNodePos[0]); if (mapPosition != mBigramMaps.end()) { return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { addBigramsForWordPosition(structurePolicy, wordPosition); return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos); return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos, nextWordPosition, unigramProbability); } void MultiBigramMap::BigramMap::init( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { BinaryDictionaryBigramsIterator bigramsIt = structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { continue; } mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); mBloomFilter.setInFilter(bigramsIt.getBigramPos()); } const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos) { structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */); } int MultiBigramMap::BigramMap::getBigramProbability( Loading @@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability( return structurePolicy->getProbability(unigramProbability, bigramProbability); } void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability, const int targetPtNodePos) { if (targetPtNodePos == NOT_A_DICT_POS) { return; } mBigramMap[targetPtNodePos] = ngramProbability; mBloomFilter.setInFilter(targetPtNodePos); } void MultiBigramMap::addBigramsForWordPosition( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { mBigramMaps[position].init(structurePolicy, position); const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos) { if (prevWordsPtNodePos) { mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos); } } int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { int bigramProbability = NOT_A_PROBABILITY; BinaryDictionaryBigramsIterator bigramsIt = structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { bigramProbability = bigramsIt.getProbability(); break; } const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability) { const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPosition); if (bigramProbability != NOT_A_PROBABILITY) { return bigramProbability; } return structurePolicy->getProbability(unigramProbability, bigramProbability); return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } } // namespace latinime native/jni/src/suggest/core/dictionary/multi_bigram_map.h +15 −10 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { Loading @@ -38,7 +39,8 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, const int wordPosition, const int nextWordPosition, const int unigramProbability); const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability); void clear() { mBigramMaps.clear(); Loading @@ -47,32 +49,35 @@ class MultiBigramMap { private: DISALLOW_COPY_AND_ASSIGN(MultiBigramMap); class BigramMap { class BigramMap : public NgramListener { public: BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} ~BigramMap() {} // Copy constructor needed for std::unordered_map. BigramMap(const BigramMap &bigramMap) : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {} virtual ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos); const int *const prevWordsPtNodePos); int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nextWordPosition, const int unigramProbability) const; virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos); private: // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default // copy constructor is needed for use in hash_map. static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP; std::unordered_map<int, int> mBigramMap; BloomFilter mBloomFilter; }; void addBigramsForWordPosition( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position); const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos); int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability); const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability); static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; std::unordered_map<int, BigramMap> mBigramMaps; Loading Loading
native/jni/src/suggest/core/dicnode/dic_node.h +4 −7 Original line number Diff line number Diff line Loading @@ -117,7 +117,7 @@ class DicNode { int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos(); for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) { newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i); newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1]; } mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos); mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, Loading Loading @@ -208,12 +208,9 @@ class DicNode { return mDicNodeProperties.getPtNodePos(); } // Used to get n-gram probability in DicNodeUtils. n is 1-indexed. int getNthPrevWordTerminalPtNodePos(const int n) const { if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { return NOT_A_DICT_POS; } return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1]; // TODO: Use view class to return PtNodePos array. const int *getPrevWordsTerminalPtNodePos() const { return mDicNodeProperties.getPrevWordsTerminalPtNodePos(); } // Used in DicNodeUtils Loading
native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +2 −9 Original line number Diff line number Diff line Loading @@ -85,17 +85,10 @@ namespace latinime { const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { const int unigramProbability = dicNode->getProbability(); const int ptNodePos = dicNode->getPtNodePos(); const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */); if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { // Note: Normally wordPos comes from the dictionary and should never equal // NOT_A_VALID_WORD_POS. return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } if (multiBigramMap) { const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos(); return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordTerminalPtNodePos, ptNodePos, unigramProbability); prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability); } return dictionaryStructurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); Loading
native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +34 −30 Original line number Diff line number Diff line Loading @@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = // Also caches the bigrams if there is space remaining and they have not been cached already. int MultiBigramMap::getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int wordPosition, const int nextWordPosition, const int unigramProbability) { const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability) { if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) { return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } std::unordered_map<int, BigramMap>::const_iterator mapPosition = mBigramMaps.find(wordPosition); mBigramMaps.find(prevWordsPtNodePos[0]); if (mapPosition != mBigramMaps.end()) { return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) { addBigramsForWordPosition(structurePolicy, wordPosition); return mBigramMaps[wordPosition].getBigramProbability(structurePolicy, addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos); return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy, nextWordPosition, unigramProbability); } return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition, return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos, nextWordPosition, unigramProbability); } void MultiBigramMap::BigramMap::init( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { BinaryDictionaryBigramsIterator bigramsIt = structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { continue; } mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability(); mBloomFilter.setInFilter(bigramsIt.getBigramPos()); } const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos) { structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */); } int MultiBigramMap::BigramMap::getBigramProbability( Loading @@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability( return structurePolicy->getProbability(unigramProbability, bigramProbability); } void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability, const int targetPtNodePos) { if (targetPtNodePos == NOT_A_DICT_POS) { return; } mBigramMap[targetPtNodePos] = ngramProbability; mBloomFilter.setInFilter(targetPtNodePos); } void MultiBigramMap::addBigramsForWordPosition( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) { mBigramMaps[position].init(structurePolicy, position); const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos) { if (prevWordsPtNodePos) { mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos); } } int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability) { int bigramProbability = NOT_A_PROBABILITY; BinaryDictionaryBigramsIterator bigramsIt = structurePolicy->getBigramsIteratorOfPtNode(nodePos); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPosition) { bigramProbability = bigramsIt.getProbability(); break; } const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability) { const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPosition); if (bigramProbability != NOT_A_PROBABILITY) { return bigramProbability; } return structurePolicy->getProbability(unigramProbability, bigramProbability); return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } } // namespace latinime
native/jni/src/suggest/core/dictionary/multi_bigram_map.h +15 −10 Original line number Diff line number Diff line Loading @@ -23,6 +23,7 @@ #include "defines.h" #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" #include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { Loading @@ -38,7 +39,8 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, const int wordPosition, const int nextWordPosition, const int unigramProbability); const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability); void clear() { mBigramMaps.clear(); Loading @@ -47,32 +49,35 @@ class MultiBigramMap { private: DISALLOW_COPY_AND_ASSIGN(MultiBigramMap); class BigramMap { class BigramMap : public NgramListener { public: BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {} ~BigramMap() {} // Copy constructor needed for std::unordered_map. BigramMap(const BigramMap &bigramMap) : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {} virtual ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos); const int *const prevWordsPtNodePos); int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nextWordPosition, const int unigramProbability) const; virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos); private: // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default // copy constructor is needed for use in hash_map. static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP; std::unordered_map<int, int> mBigramMap; BloomFilter mBloomFilter; }; void addBigramsForWordPosition( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position); const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos); int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const int nextWordPosition, const int unigramProbability); const DictionaryStructureWithBufferPolicy *const structurePolicy, const int *const prevWordsPtNodePos, const int nextWordPosition, const int unigramProbability); static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; std::unordered_map<int, BigramMap> mBigramMaps; Loading