Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2111e3ab authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Introduce WordAttributes to get word probability and flags.

Bug: 14425059

Change-Id: Iee11d038e0893d7ddd6c52447907f8c55fecb6a5
parent 11a48f92
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -72,10 +72,10 @@ namespace latinime {
    if (dicNode->hasMultipleWords() && !dicNode->isValidMultipleWordSuggestion()) {
        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
    }
    const int probability = dictionaryStructurePolicy->getProbabilityOfWordInContext(
    const WordAttributes wordAttributes = dictionaryStructurePolicy->getWordAttributesInContext(
            dicNode->getPrevWordIds(), dicNode->getWordId(), multiBigramMap);
    // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
    const float cost = static_cast<float>(MAX_PROBABILITY - probability)
    const float cost = static_cast<float>(MAX_PROBABILITY - wordAttributes.getProbability())
            / static_cast<float>(MAX_PROBABILITY);
    return cost;
}
+4 −3
Original line number Diff line number Diff line
@@ -84,9 +84,10 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
    if (codePointCount <= 0) {
        return;
    }
    const int probability = mDictStructurePolicy->getProbabilityOfWordInContext(mPrevWordIds.data(),
            targetWordId, nullptr /* multiBigramMap */);
    mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
    const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext(
            mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */);
    mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount,
            wordAttributes.getProbability());
}

void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
+60 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2014, The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_WORD_ATTRIBUTES_H
#define LATINIME_WORD_ATTRIBUTES_H

#include "defines.h"

class WordAttributes {
 public:
    // Invalid word attributes.
    WordAttributes()
            : mProbability(NOT_A_PROBABILITY), mIsBlacklisted(false), mIsNotAWord(false),
              mIsPossiblyOffensive(false) {}

    WordAttributes(const int probability, const bool isBlacklisted, const bool isNotAWord,
            const bool isPossiblyOffensive)
            : mProbability(probability), mIsBlacklisted(isBlacklisted), mIsNotAWord(isNotAWord),
              mIsPossiblyOffensive(isPossiblyOffensive) {}

    int getProbability() const {
        return mProbability;
    }

    bool isBlacklisted() const {
        return mIsBlacklisted;
    }

    bool isNotAWord() const {
        return mIsNotAWord;
    }

    bool isPossiblyOffensive() const {
        return mIsPossiblyOffensive;
    }

 private:
    DISALLOW_ASSIGNMENT_OPERATOR(WordAttributes);

    int mProbability;
    bool mIsBlacklisted;
    bool mIsNotAWord;
    bool mIsPossiblyOffensive;
};

 // namespace
#endif /* LATINIME_WORD_ATTRIBUTES_H */
+3 −2
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/dictionary/word_attributes.h"
#include "utils/int_array_view.h"

namespace latinime {
@@ -57,8 +58,8 @@ class DictionaryStructureWithBufferPolicy {
    virtual int getWordId(const CodePointArrayView wordCodePoints,
            const bool forceLowerCaseSearch) const = 0;

    virtual int getProbabilityOfWordInContext(const int *const prevWordIds, const int wordId,
            MultiBigramMap *const multiBigramMap) const = 0;
    virtual const WordAttributes getWordAttributesInContext(const int *const prevWordIds,
            const int wordId, MultiBigramMap *const multiBigramMap) const = 0;

    // TODO: Remove
    virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0;
+16 −7
Original line number Diff line number Diff line
@@ -118,24 +118,33 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
    return getWordIdFromTerminalPtNodePos(ptNodePos);
}

int Ver4PatriciaTriePolicy::getProbabilityOfWordInContext(const int *const prevWordIds,
        const int wordId, MultiBigramMap *const multiBigramMap) const {
const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
        const int *const prevWordIds, const int wordId,
        MultiBigramMap *const multiBigramMap) const {
    if (wordId == NOT_A_WORD_ID) {
        return NOT_A_PROBABILITY;
        return WordAttributes();
    }
    const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
    const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    if (multiBigramMap) {
        return multiBigramMap->getBigramProbability(this /* structurePolicy */, prevWordIds,
                wordId, ptNodeParams.getProbability());
        const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */,
                prevWordIds, wordId, ptNodeParams.getProbability());
        return getWordAttributes(probability, ptNodeParams);
    }
    if (prevWordIds) {
        const int probability = getProbabilityOfWord(prevWordIds, wordId);
        if (probability != NOT_A_PROBABILITY) {
            return probability;
            return getWordAttributes(probability, ptNodeParams);
        }
    }
    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
    return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY),
            ptNodeParams);
}

const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
        const PtNodeParams &ptNodeParams) const {
    return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
            ptNodeParams.getProbability() == 0);
}

int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
Loading