Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 252412d7 authored by Satoshi Kataoka's avatar Satoshi Kataoka
Browse files

Use additional multi-word cost per language (for Russian)

Bug: 7540133
Change-Id: I7eb7b8399746c15452ed2ed5069955e88fb546d3
parent 962c9f05
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -92,6 +92,7 @@ class BinaryFormat {
            const int unigramProbability, const int bigramProbability);
    static int getProbability(const int position, const std::map<int, int> *bigramMap,
            const uint8_t *bigramFilter, const int unigramProbability);
    static float getMultiWordCostMultiplier(const uint8_t *const dict);

    // Flags for special processing
    // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or
@@ -241,6 +242,17 @@ AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *
    return ((msb & 0x7F) << 8) | dict[(*pos)++];
}

inline float BinaryFormat::getMultiWordCostMultiplier(const uint8_t *const dict) {
    const int headerValue = readHeaderValueInt(dict, "MULTIPLE_WORDS_DEMOTION_RATE");
    if (headerValue == S_INT_MIN) {
        return 1.0f;
    }
    if (headerValue <= 0) {
        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
    }
    return 100.0f / static_cast<float>(headerValue);
}

inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) {
    return dict[(*pos)++];
}
+2 −3
Original line number Diff line number Diff line
@@ -424,10 +424,9 @@ typedef enum {
    CT_OMISSION,
    CT_INSERTION,
    CT_TRANSPOSITION,
    CT_SPACE_SUBSTITUTION,
    CT_SPACE_OMISSION,
    CT_COMPLETION,
    CT_TERMINAL,
    CT_NEW_WORD,
    CT_NEW_WORD_SPACE_OMITTION,
    CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
#endif // LATINIME_DEFINES_H
+15 −15
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
    case CT_SUBSTITUTION:
        PROF_SUBSTITUTION(node->mProfiler);
        return;
    case CT_NEW_WORD:
    case CT_NEW_WORD_SPACE_OMITTION:
        PROF_NEW_WORD(node->mProfiler);
        return;
    case CT_MATCH:
@@ -50,7 +50,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
    case CT_TERMINAL:
        PROF_TERMINAL(node->mProfiler);
        return;
    case CT_SPACE_SUBSTITUTION:
    case CT_NEW_WORD_SPACE_SUBSTITUTION:
        PROF_SPACE_SUBSTITUTION(node->mProfiler);
        return;
    case CT_INSERTION:
@@ -107,16 +107,16 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
    case CT_SUBSTITUTION:
        // only used for typing
        return weighting->getSubstitutionCost();
    case CT_NEW_WORD:
        return weighting->getNewWordCost(dicNode);
    case CT_NEW_WORD_SPACE_OMITTION:
        return weighting->getNewWordCost(traverseSession, dicNode);
    case CT_MATCH:
        return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
    case CT_COMPLETION:
        return weighting->getCompletionCost(traverseSession, dicNode);
    case CT_TERMINAL:
        return weighting->getTerminalSpatialCost(traverseSession, dicNode);
    case CT_SPACE_SUBSTITUTION:
        return weighting->getSpaceSubstitutionCost();
    case CT_NEW_WORD_SPACE_SUBSTITUTION:
        return weighting->getSpaceSubstitutionCost(traverseSession, dicNode);
    case CT_INSERTION:
        return weighting->getInsertionCost(traverseSession, parentDicNode, dicNode);
    case CT_TRANSPOSITION:
@@ -135,7 +135,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
        return 0.0f;
    case CT_SUBSTITUTION:
        return 0.0f;
    case CT_NEW_WORD:
    case CT_NEW_WORD_SPACE_OMITTION:
        return weighting->getNewWordBigramCost(traverseSession, parentDicNode, bigramCacheMap);
    case CT_MATCH:
        return 0.0f;
@@ -147,8 +147,8 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
                        traverseSession->getOffsetDict(), dicNode, bigramCacheMap);
        return weighting->getTerminalLanguageCost(traverseSession, dicNode, languageImprobability);
    }
    case CT_SPACE_SUBSTITUTION:
        return 0.0f;
    case CT_NEW_WORD_SPACE_SUBSTITUTION:
        return weighting->getNewWordBigramCost(traverseSession, parentDicNode, bigramCacheMap);
    case CT_INSERTION:
        return 0.0f;
    case CT_TRANSPOSITION:
@@ -168,7 +168,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
        case CT_SUBSTITUTION:
            // Should return true?
            return false;
        case CT_NEW_WORD:
        case CT_NEW_WORD_SPACE_OMITTION:
            return false;
        case CT_MATCH:
            return false;
@@ -176,7 +176,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
            return false;
        case CT_TERMINAL:
            return false;
        case CT_SPACE_SUBSTITUTION:
        case CT_NEW_WORD_SPACE_SUBSTITUTION:
            return false;
        case CT_INSERTION:
            return true;
@@ -197,7 +197,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
            return false;
        case CT_SUBSTITUTION:
            return false;
        case CT_NEW_WORD:
        case CT_NEW_WORD_SPACE_OMITTION:
            return false;
        case CT_MATCH:
            return weighting->isProximityDicNode(traverseSession, dicNode);
@@ -205,7 +205,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
            return false;
        case CT_TERMINAL:
            return false;
        case CT_SPACE_SUBSTITUTION:
        case CT_NEW_WORD_SPACE_SUBSTITUTION:
            return false;
        case CT_INSERTION:
            return false;
@@ -224,7 +224,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
            return 0;
        case CT_SUBSTITUTION:
            return 0;
        case CT_NEW_WORD:
        case CT_NEW_WORD_SPACE_OMITTION:
            return 0;
        case CT_MATCH:
            return 1;
@@ -232,7 +232,7 @@ static inline void profile(const CorrectionType correctionType, DicNode *const n
            return 0;
        case CT_TERMINAL:
            return 0;
        case CT_SPACE_SUBSTITUTION:
        case CT_NEW_WORD_SPACE_SUBSTITUTION:
            return 1;
        case CT_INSERTION:
            return 2;
+4 −2
Original line number Diff line number Diff line
@@ -56,7 +56,8 @@ class Weighting {
            const DicTraverseSession *const traverseSession,
            const DicNode *const parentDicNode, const DicNode *const dicNode) const = 0;

    virtual float getNewWordCost(const DicNode *const dicNode) const = 0;
    virtual float getNewWordCost(const DicTraverseSession *const traverseSession,
            const DicNode *const dicNode) const = 0;

    virtual float getNewWordBigramCost(
            const DicTraverseSession *const traverseSession, const DicNode *const dicNode,
@@ -76,7 +77,8 @@ class Weighting {

    virtual float getSubstitutionCost() const = 0;

    virtual float getSpaceSubstitutionCost() const = 0;
    virtual float getSpaceSubstitutionCost(const DicTraverseSession *const traverseSession,
            const DicNode *const dicNode) const = 0;

    Weighting() {}
    virtual ~Weighting() {}
+2 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@

#include "suggest/core/session/dic_traverse_session.h"

#include "binary_format.h"
#include "defines.h"
#include "dictionary.h"
#include "dic_traverse_wrapper.h"
@@ -63,6 +64,7 @@ static TraverseSessionFactoryRegisterer traverseSessionFactoryRegisterer;
void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord,
        int prevWordLength) {
    mDictionary = dictionary;
    mMultiWordCostMultiplier = BinaryFormat::getMultiWordCostMultiplier(mDictionary->getDict());
    if (!prevWord) {
        mPrevWordPos = NOT_VALID_WORD;
        return;
Loading