Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 35c62b2c authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use NgramListener in MultiBigramMap.

Bug: 14425059
Change-Id: I425536290111f2a8172f31370706f858a1e07f6e
parent da5ccd9f
Loading
Loading
Loading
Loading
+4 −7
Original line number Diff line number Diff line
@@ -117,7 +117,7 @@ class DicNode {
        int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
        newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
        for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
            newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
            newPrevWordsPtNodePos[i] = dicNode->getPrevWordsTerminalPtNodePos()[i - 1];
        }
        mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
        mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
@@ -208,12 +208,9 @@ class DicNode {
        return mDicNodeProperties.getPtNodePos();
    }

    // Used to get n-gram probability in DicNodeUtils. n is 1-indexed.
    int getNthPrevWordTerminalPtNodePos(const int n) const {
        if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
            return NOT_A_DICT_POS;
        }
        return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
    // TODO: Use view class to return PtNodePos array.
    const int *getPrevWordsTerminalPtNodePos() const {
        return mDicNodeProperties.getPrevWordsTerminalPtNodePos();
    }

    // Used in DicNodeUtils
+2 −9
Original line number Diff line number Diff line
@@ -85,17 +85,10 @@ namespace latinime {
        const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
        const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
    const int unigramProbability = dicNode->getProbability();
    const int ptNodePos = dicNode->getPtNodePos();
    const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
    if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
        // Note: Normally wordPos comes from the dictionary and should never equal
        // NOT_A_VALID_WORD_POS.
        return dictionaryStructurePolicy->getProbability(unigramProbability,
                NOT_A_PROBABILITY);
    }
    if (multiBigramMap) {
        const int *const prevWordsPtNodePos = dicNode->getPrevWordsTerminalPtNodePos();
        return multiBigramMap->getBigramProbability(dictionaryStructurePolicy,
                prevWordTerminalPtNodePos, ptNodePos, unigramProbability);
                prevWordsPtNodePos, dicNode->getPtNodePos(), unigramProbability);
    }
    return dictionaryStructurePolicy->getProbability(unigramProbability,
            NOT_A_PROBABILITY);
+34 −30
Original line number Diff line number Diff line
@@ -35,34 +35,30 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP =
// Also caches the bigrams if there is space remaining and they have not been cached already.
int MultiBigramMap::getBigramProbability(
        const DictionaryStructureWithBufferPolicy *const structurePolicy,
        const int wordPosition, const int nextWordPosition, const int unigramProbability) {
        const int *const prevWordsPtNodePos, const int nextWordPosition,
        const int unigramProbability) {
    if (!prevWordsPtNodePos || prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
        return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
    }
    std::unordered_map<int, BigramMap>::const_iterator mapPosition =
            mBigramMaps.find(wordPosition);
            mBigramMaps.find(prevWordsPtNodePos[0]);
    if (mapPosition != mBigramMaps.end()) {
        return mapPosition->second.getBigramProbability(structurePolicy, nextWordPosition,
                unigramProbability);
    }
    if (mBigramMaps.size() < MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP) {
        addBigramsForWordPosition(structurePolicy, wordPosition);
        return mBigramMaps[wordPosition].getBigramProbability(structurePolicy,
        addBigramsForWordPosition(structurePolicy, prevWordsPtNodePos);
        return mBigramMaps[prevWordsPtNodePos[0]].getBigramProbability(structurePolicy,
                nextWordPosition, unigramProbability);
    }
    return readBigramProbabilityFromBinaryDictionary(structurePolicy, wordPosition,
    return readBigramProbabilityFromBinaryDictionary(structurePolicy, prevWordsPtNodePos,
            nextWordPosition, unigramProbability);
}

void MultiBigramMap::BigramMap::init(
        const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
    BinaryDictionaryBigramsIterator bigramsIt =
            structurePolicy->getBigramsIteratorOfPtNode(nodePos);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
            continue;
        }
        mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
        mBloomFilter.setInFilter(bigramsIt.getBigramPos());
    }
        const DictionaryStructureWithBufferPolicy *const structurePolicy,
        const int *const prevWordsPtNodePos) {
    structurePolicy->iterateNgramEntries(prevWordsPtNodePos, this /* listener */);
}

int MultiBigramMap::BigramMap::getBigramProbability(
@@ -79,25 +75,33 @@ int MultiBigramMap::BigramMap::getBigramProbability(
    return structurePolicy->getProbability(unigramProbability, bigramProbability);
}

void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability,
        const int targetPtNodePos) {
    if (targetPtNodePos == NOT_A_DICT_POS) {
        return;
    }
    mBigramMap[targetPtNodePos] = ngramProbability;
    mBloomFilter.setInFilter(targetPtNodePos);
}

void MultiBigramMap::addBigramsForWordPosition(
        const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position) {
    mBigramMaps[position].init(structurePolicy, position);
        const DictionaryStructureWithBufferPolicy *const structurePolicy,
        const int *const prevWordsPtNodePos) {
    if (prevWordsPtNodePos) {
        mBigramMaps[prevWordsPtNodePos[0]].init(structurePolicy, prevWordsPtNodePos);
    }
}

int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
        const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
        const int nextWordPosition, const int unigramProbability) {
    int bigramProbability = NOT_A_PROBABILITY;
    BinaryDictionaryBigramsIterator bigramsIt =
            structurePolicy->getBigramsIteratorOfPtNode(nodePos);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == nextWordPosition) {
            bigramProbability = bigramsIt.getProbability();
            break;
        }
        const DictionaryStructureWithBufferPolicy *const structurePolicy,
        const int *const prevWordsPtNodePos, const int nextWordPosition,
        const int unigramProbability) {
    const int bigramProbability = structurePolicy->getProbabilityOfPtNode(prevWordsPtNodePos,
            nextWordPosition);
    if (bigramProbability != NOT_A_PROBABILITY) {
        return bigramProbability;
    }
    return structurePolicy->getProbability(unigramProbability, bigramProbability);
    return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY);
}

} // namespace latinime
+15 −10
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/bloom_filter.h"
#include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"

namespace latinime {
@@ -38,7 +39,8 @@ class MultiBigramMap {
    // Look up the bigram probability for the given word pair from the cached bigram maps.
    // Also caches the bigrams if there is space remaining and they have not been cached already.
    int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy,
            const int wordPosition, const int nextWordPosition, const int unigramProbability);
            const int *const prevWordsPtNodePos, const int nextWordPosition,
            const int unigramProbability);

    void clear() {
        mBigramMaps.clear();
@@ -47,32 +49,35 @@ class MultiBigramMap {
 private:
    DISALLOW_COPY_AND_ASSIGN(MultiBigramMap);

    class BigramMap {
    class BigramMap : public NgramListener {
     public:
        BigramMap() : mBigramMap(DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP), mBloomFilter() {}
        ~BigramMap() {}
        // Copy constructor needed for std::unordered_map.
        BigramMap(const BigramMap &bigramMap)
                : mBigramMap(bigramMap.mBigramMap), mBloomFilter(bigramMap.mBloomFilter) {}
        virtual ~BigramMap() {}

        void init(const DictionaryStructureWithBufferPolicy *const structurePolicy,
                const int nodePos);

                const int *const prevWordsPtNodePos);
        int getBigramProbability(
                const DictionaryStructureWithBufferPolicy *const structurePolicy,
                const int nextWordPosition, const int unigramProbability) const;
        virtual void onVisitEntry(const int ngramProbability, const int targetPtNodePos);

     private:
        // NOTE: The BigramMap class doesn't use DISALLOW_COPY_AND_ASSIGN() because its default
        // copy constructor is needed for use in hash_map.
        static const int DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP;
        std::unordered_map<int, int> mBigramMap;
        BloomFilter mBloomFilter;
    };

    void addBigramsForWordPosition(
            const DictionaryStructureWithBufferPolicy *const structurePolicy, const int position);
            const DictionaryStructureWithBufferPolicy *const structurePolicy,
            const int *const prevWordsPtNodePos);

    int readBigramProbabilityFromBinaryDictionary(
            const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
            const int nextWordPosition, const int unigramProbability);
            const DictionaryStructureWithBufferPolicy *const structurePolicy,
            const int *const prevWordsPtNodePos, const int nextWordPosition,
            const int unigramProbability);

    static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP;
    std::unordered_map<int, BigramMap> mBigramMaps;