Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b6e1777d authored by Tom Ouyang's avatar Tom Ouyang Committed by Android (Google) Code Review
Browse files

Merge "Add mechanism to handle digraphs in DicNode"

parents 099dd3f2 25e8eda9
Loading
Loading
Loading
Loading
+57 −17
Original line number Diff line number Diff line
@@ -27,39 +27,47 @@ const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
        { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
        { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE
const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] =
        { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES };

/* static */ bool DigraphUtils::hasDigraphForCodePoint(
        const int dictFlags, const int compositeGlyphCodePoint) {
    if (DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint)) {
    const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
    if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) {
        return true;
    }
    return false;
}

// Retrieves the set of all digraphs associated with the given dictionary.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
        const int dictFlags, const DigraphUtils::digraph_t **digraphs) {
// Returns the digraph type associated with the given dictionary.
/* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary(
        const int dictFlags) {
    if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
        *digraphs = DigraphUtils::GERMAN_UMLAUT_DIGRAPHS;
        return NELEMS(DigraphUtils::GERMAN_UMLAUT_DIGRAPHS);
        return DIGRAPH_TYPE_GERMAN_UMLAUT;
    }
    if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
        *digraphs = DigraphUtils::FRENCH_LIGATURES_DIGRAPHS;
        return NELEMS(DigraphUtils::FRENCH_LIGATURES_DIGRAPHS);
        return DIGRAPH_TYPE_FRENCH_LIGATURES;
    }
    return 0;
    return DIGRAPH_TYPE_NONE;
}

// Retrieves the set of all digraphs associated with the given dictionary flags.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
        const int dictFlags, const DigraphUtils::digraph_t **const digraphs) {
    const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags);
    return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs);
}

// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
// (which specifies the first or second codepoint in the digraph).
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int dictFlags,
        const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex) {
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
        const DigraphCodePointIndex digraphCodePointIndex) {
    if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
        return NOT_A_CODE_POINT;
    }
    const DigraphUtils::digraph_t *digraph =
            DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint);
    const DigraphUtils::digraph_t *const digraph =
            DigraphUtils::getDigraphForCodePoint(compositeGlyphCodePoint);
    if (!digraph) {
        return NOT_A_CODE_POINT;
    }
@@ -72,16 +80,48 @@ const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
    return NOT_A_CODE_POINT;
}

// Retrieves the set of all digraphs associated with the given digraph type.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(
        const DigraphUtils::DigraphType digraphType,
        const DigraphUtils::digraph_t **const digraphs) {
    if (digraphType == DigraphUtils::DIGRAPH_TYPE_GERMAN_UMLAUT) {
        *digraphs = GERMAN_UMLAUT_DIGRAPHS;
        return NELEMS(GERMAN_UMLAUT_DIGRAPHS);
    }
    if (digraphType == DIGRAPH_TYPE_FRENCH_LIGATURES) {
        *digraphs = FRENCH_LIGATURES_DIGRAPHS;
        return NELEMS(FRENCH_LIGATURES_DIGRAPHS);
    }
    return 0;
}

/**
 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
 * dictFlags: the dictionary flags needed to determine which digraphs are supported.
 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
 */
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
        const int dictFlags, const int compositeGlyphCodePoint) {
        const int compositeGlyphCodePoint) {
    for (size_t i = 0; i < NELEMS(USED_DIGRAPH_TYPES); i++) {
        const DigraphUtils::digraph_t *const digraph = getDigraphForDigraphTypeAndCodePoint(
                USED_DIGRAPH_TYPES[i], compositeGlyphCodePoint);
        if (digraph) {
            return digraph;
        }
    }
    return 0;
}

/**
 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
 * digraphType: the type of digraphs supported.
 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
 */
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint(
        const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) {
    const DigraphUtils::digraph_t *digraphs = 0;
    const int digraphsSize =
            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(dictFlags, &digraphs);
            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs);
    for (int i = 0; i < digraphsSize; i++) {
        if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) {
            return &digraphs[i];
+16 −3
Original line number Diff line number Diff line
@@ -27,21 +27,34 @@ class DigraphUtils {
        SECOND_DIGRAPH_CODEPOINT
    } DigraphCodePointIndex;

    typedef enum {
        DIGRAPH_TYPE_NONE,
        DIGRAPH_TYPE_GERMAN_UMLAUT,
        DIGRAPH_TYPE_FRENCH_LIGATURES
    } DigraphType;

    typedef struct { int first; int second; int compositeGlyph; } digraph_t;

    static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint);
    static int getAllDigraphsForDictionaryAndReturnSize(
            const int dictFlags, const digraph_t **digraphs);
            const int dictFlags, const digraph_t **const digraphs);
    static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint,
            const DigraphCodePointIndex digraphCodePointIndex);
    static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint,
            const DigraphCodePointIndex digraphCodePointIndex);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
    static const digraph_t *getDigraphForCodePoint(
            const int dictFlags, const int compositeGlyphCodePoint);
    static DigraphType getDigraphTypeForDictionary(const int dictFlags);
    static int getAllDigraphsForDigraphTypeAndReturnSize(
            const DigraphType digraphType, const digraph_t **const digraphs);
    static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint);
    static const digraph_t *getDigraphForDigraphTypeAndCodePoint(
            const DigraphType digraphType, const int compositeGlyphCodePoint);

    static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
    static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
    static const DigraphType USED_DIGRAPH_TYPES[];
};
} // namespace latinime
#endif // DIGRAPH_UTILS_H
+18 −1
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include "dic_node_profiler.h"
#include "dic_node_properties.h"
#include "dic_node_release_listener.h"
#include "digraph_utils.h"

#if DEBUG_DICT
#define LOGI_SHOW_ADD_COST_PROP \
@@ -399,8 +400,15 @@ class DicNode {
    // TODO: Remove     //
    //////////////////////
    // TODO: Remove once touch path is merged into ProximityInfoState
    // Note: Returned codepoint may be a digraph codepoint if the node is in a composite glyph.
    int getNodeCodePoint() const {
        return mDicNodeProperties.getNodeCodePoint();
        const int codePoint = mDicNodeProperties.getNodeCodePoint();
        const DigraphUtils::DigraphCodePointIndex digraphIndex =
                mDicNodeState.mDicNodeStateScoring.getDigraphIndex();
        if (digraphIndex == DigraphUtils::NOT_A_DIGRAPH_INDEX) {
            return codePoint;
        }
        return DigraphUtils::getDigraphCodePointForIndex(codePoint, digraphIndex);
    }

    ////////////////////////////////
@@ -452,6 +460,15 @@ class DicNode {
        mDicNodeState.mDicNodeStateScoring.setDoubleLetterLevel(doubleLetterLevel);
    }

    bool isInDigraph() const {
        return mDicNodeState.mDicNodeStateScoring.getDigraphIndex()
                != DigraphUtils::NOT_A_DIGRAPH_INDEX;
    }

    void advanceDigraphIndex() {
        mDicNodeState.mDicNodeStateScoring.advanceDigraphIndex();
    }

    uint8_t getFlags() const {
        return mDicNodeProperties.getFlags();
    }
+23 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <stdint.h>

#include "defines.h"
#include "digraph_utils.h"

namespace latinime {

@@ -27,6 +28,7 @@ class DicNodeStateScoring {
 public:
    AK_FORCE_INLINE DicNodeStateScoring()
            : mDoubleLetterLevel(NOT_A_DOUBLE_LETTER),
              mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
              mEditCorrectionCount(0), mProximityCorrectionCount(0),
              mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
              mTotalPrevWordsLanguageCost(0.0f), mRawLength(0.0f) {
@@ -43,6 +45,7 @@ class DicNodeStateScoring {
        mTotalPrevWordsLanguageCost = 0.0f;
        mRawLength = 0.0f;
        mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
        mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
    }

    AK_FORCE_INLINE void init(const DicNodeStateScoring *const scoring) {
@@ -54,6 +57,7 @@ class DicNodeStateScoring {
        mTotalPrevWordsLanguageCost = scoring->mTotalPrevWordsLanguageCost;
        mRawLength = scoring->mRawLength;
        mDoubleLetterLevel = scoring->mDoubleLetterLevel;
        mDigraphIndex = scoring->mDigraphIndex;
    }

    void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
@@ -126,6 +130,24 @@ class DicNodeStateScoring {
        }
    }

    DigraphUtils::DigraphCodePointIndex getDigraphIndex() const {
        return mDigraphIndex;
    }

    void advanceDigraphIndex() {
        switch(mDigraphIndex) {
            case DigraphUtils::NOT_A_DIGRAPH_INDEX:
                mDigraphIndex = DigraphUtils::FIRST_DIGRAPH_CODEPOINT;
                break;
            case DigraphUtils::FIRST_DIGRAPH_CODEPOINT:
                mDigraphIndex = DigraphUtils::SECOND_DIGRAPH_CODEPOINT;
                break;
            case DigraphUtils::SECOND_DIGRAPH_CODEPOINT:
                mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
                break;
        }
    }

    float getTotalPrevWordsLanguageCost() const {
        return mTotalPrevWordsLanguageCost;
    }
@@ -135,6 +157,7 @@ class DicNodeStateScoring {
    // Use a default copy constructor and an assign operator because shallow copies are ok
    // for this class
    DoubleLetterLevel mDoubleLetterLevel;
    DigraphUtils::DigraphCodePointIndex mDigraphIndex;

    int16_t mEditCorrectionCount;
    int16_t mProximityCorrectionCount;