Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7ce8add authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Move prev word related logic to PrevWordsInfo."

parents 23246ed1 45d1a936
Loading
Loading
Loading
Loading
+4 −20
Original line number Diff line number Diff line
@@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() {
 */
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
        SuggestionResults *const outSuggestionResults) const {
    int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
            prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
    if (NOT_A_DICT_POS == pos) {
        // If no bigrams for this exact word, search again in lower case.
        pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
                prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
    }
    // If still no bigrams, we really don't have them!
    if (NOT_A_DICT_POS == pos) return;

    int unigramProbability = 0;
    int bigramCodePoints[MAX_WORD_LENGTH];
    BinaryDictionaryBigramsIterator bigramsIt(
            mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
    BinaryDictionaryBigramsIterator bigramsIt =
            prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
@@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in

int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
        const int *word1, int length1) const {
    int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
            prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
    if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
    int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
            false /* forceLowerCaseSearch */);
    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;

    BinaryDictionaryBigramsIterator bigramsIt(
            mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
    BinaryDictionaryBigramsIterator bigramsIt =
            prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == nextWordPos
+5 −0
Original line number Diff line number Diff line
@@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator {
              mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
              mHasNext(pos != NOT_A_DICT_POS) {}

    BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
            : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
              mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
              mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}

    AK_FORCE_INLINE bool hasNext() const {
        return mHasNext;
    }
+2 −15
Original line number Diff line number Diff line
@@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
    mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
            ->getMultiWordCostMultiplier();
    mSuggestOptions = suggestOptions;
    if (!prevWordsInfo->getPrevWordCodePoints()) {
        mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
        return;
    }
    // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
    mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
            prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
            false /* forceLowerCaseSearch */);
    if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
        // Check bigrams for lower-cased previous word if original was not found. Useful for
        // auto-capitalized words like "The [current_word]".
        mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
                prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
                true /* forceLowerCaseSearch */);
    }
    prevWordsInfo->getPrevWordsTerminalPtNodePos(
            getDictionaryStructurePolicy(), mPrevWordsPtNodePos);
}

void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
+53 −4
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@
#define LATINIME_PREV_WORDS_INFO_H

#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"

namespace latinime {

@@ -38,17 +40,64 @@ class PrevWordsInfo {
        mPrevWordCodePointCount[0] = prevWordCodePointCount;
        mIsBeginningOfSentence[0] = isBeginningOfSentence;
    }
    const int *getPrevWordCodePoints() const {
        return mPrevWordCodePoints[0];

    void getPrevWordsTerminalPtNodePos(
            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            int *const outPrevWordsTerminalPtNodePos) const {
        for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
            outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
                    mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
                    mIsBeginningOfSentence[i]);
        }
    }

    int getPrevWordCodePointCount() const {
        return mPrevWordCodePointCount[0];
    BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
        int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
                mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
        // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
        // dictionary or has no bigrams
        if (NOT_A_DICT_POS == pos) {
            // If no bigrams for this exact word, search again in lower case.
            pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
                    mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
        }
        return BinaryDictionaryBigramsIterator(
                dictStructurePolicy->getBigramsStructurePolicy(), pos);
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);

    static int getTerminalPtNodePosOfWord(
            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            const int *const wordCodePoints, const int wordCodePointCount,
            const bool isBeginningOfSentence) {
        if (!dictStructurePolicy || !wordCodePoints) {
            return NOT_A_DICT_POS;
        }
        const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
                wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
        if (wordPtNodePos != NOT_A_DICT_POS) {
            return wordPtNodePos;
        }
        // Check bigrams for lower-cased previous word if original was not found. Useful for
        // auto-capitalized words like "The [current_word]".
        return dictStructurePolicy->getTerminalPtNodePositionOfWord(
                wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
    }

    static int getBigramListPositionForWord(
            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            const int *wordCodePoints, const int wordCodePointCount,
            const bool forceLowerCaseSearch) {
        if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
        const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
                wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
        if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
        return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
    }

    void clear() {
        for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
            mPrevWordCodePoints[i] = nullptr;