Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2fdf5b70 authored by Tom Ouyang's avatar Tom Ouyang
Browse files

Add DigraphUtils class

Add a new DigraphUtils class to manage digraph handling for both
gesture and typing input.

Bug: 8493920
Change-Id: I3a509e1311b9039653f9f488d3c28bb54205f416
parent 868805ae
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ LATIN_IME_CORE_SRC_FILES := \
    correction.cpp \
    dictionary.cpp \
    dic_traverse_wrapper.cpp \
    digraph_utils.cpp \
    proximity_info.cpp \
    proximity_info_params.cpp \
    proximity_info_state.cpp \
+93 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "binary_format.h"
#include "defines.h"
#include "digraph_utils.h"

namespace latinime {

const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
        { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
        { 'o', 'e', 0x00F6 },   // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
        { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
        { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
        { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE

/* static */ bool DigraphUtils::hasDigraphForCodePoint(
        const int dictFlags, const int compositeGlyphCodePoint) {
    if (DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint)) {
        return true;
    }
    return false;
}

// Retrieves the set of all digraphs associated with the given dictionary.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
        const int dictFlags, const DigraphUtils::digraph_t **digraphs) {
    if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
        *digraphs = DigraphUtils::GERMAN_UMLAUT_DIGRAPHS;
        return NELEMS(DigraphUtils::GERMAN_UMLAUT_DIGRAPHS);
    }
    if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
        *digraphs = DigraphUtils::FRENCH_LIGATURES_DIGRAPHS;
        return NELEMS(DigraphUtils::FRENCH_LIGATURES_DIGRAPHS);
    }
    return 0;
}

// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
// (which specifies the first or second codepoint in the digraph).
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int dictFlags,
        const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex) {
    if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
        return NOT_A_CODE_POINT;
    }
    const DigraphUtils::digraph_t *digraph =
            DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint);
    if (!digraph) {
        return NOT_A_CODE_POINT;
    }
    if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
        return digraph->first;
    } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
        return digraph->second;
    }
    ASSERT(false);
    return NOT_A_CODE_POINT;
}

/**
 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
 * dictFlags: the dictionary flags needed to determine which digraphs are supported.
 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
 */
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
        const int dictFlags, const int compositeGlyphCodePoint) {
    const DigraphUtils::digraph_t *digraphs = 0;
    const int digraphsSize =
            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(dictFlags, &digraphs);
    for (int i = 0; i < digraphsSize; i++) {
        if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) {
            return &digraphs[i];
        }
    }
    return 0;
}

} // namespace latinime
+47 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef DIGRAPH_UTILS_H
#define DIGRAPH_UTILS_H

namespace latinime {

class DigraphUtils {
 public:
    typedef enum {
        NOT_A_DIGRAPH_INDEX,
        FIRST_DIGRAPH_CODEPOINT,
        SECOND_DIGRAPH_CODEPOINT
    } DigraphCodePointIndex;

    typedef struct { int first; int second; int compositeGlyph; } digraph_t;

    static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint);
    static int getAllDigraphsForDictionaryAndReturnSize(
            const int dictFlags, const digraph_t **digraphs);
    static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint,
            const DigraphCodePointIndex digraphCodePointIndex);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
    static const digraph_t *getDigraphForCodePoint(
            const int dictFlags, const int compositeGlyphCodePoint);

    static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
    static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
};
} // namespace latinime
#endif // DIGRAPH_UTILS_H
+9 −22
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include "char_utils.h"
#include "defines.h"
#include "dictionary.h"
#include "digraph_utils.h"
#include "proximity_info.h"
#include "terminal_attributes.h"
#include "unigram_dictionary.h"
@@ -30,15 +31,6 @@

namespace latinime {

const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =
        { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
        { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
        { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS

const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[] =
        { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
        { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE

// TODO: check the header
UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags)
        : DICT_ROOT(streamStart), ROOT_POS(0),
@@ -58,7 +50,7 @@ static void addWord(int *word, int length, int probability, WordsPriorityQueue *

// Return the replacement code point for a digraph, or 0 if none.
int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize,
        const digraph_t *const digraphs, const unsigned int digraphsSize) const {
        const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {

    // There can't be a digraph if we don't have at least 2 characters to examine
    if (i + 2 > inputSize) return false;
@@ -74,7 +66,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons

    // It's an interesting digraph if the second char matches too.
    if (digraphs[lastDigraphIndex].second == codes[i + 1]) {
        return digraphs[lastDigraphIndex].replacement;
        return digraphs[lastDigraphIndex].compositeGlyph;
    } else {
        return 0;
    }
@@ -93,7 +85,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
        const bool useFullEditDistance, const int *codesSrc,
        const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
        WordsPriorityQueuePool *queuePool,
        const digraph_t *const digraphs, const unsigned int digraphsSize) const {
        const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {
    ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0]));
    ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0]));
    ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0]));
@@ -169,7 +161,10 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
    queuePool.clearAll();
    Correction masterCorrection;
    masterCorrection.resetCorrection();
    if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
    const DigraphUtils::digraph_t *digraphs = 0;
    const int digraphsSize =
            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(FLAGS, &digraphs);
    if (digraphsSize > 0)
    { // Incrementally tune the word and try all possibilities
        int codesBuffer[sizeof(*inputCodePoints) * inputSize];
        int xCoordinatesBuffer[inputSize];
@@ -177,15 +172,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
        getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
                xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
                useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
                &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS));
    } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
        int codesBuffer[sizeof(*inputCodePoints) * inputSize];
        int xCoordinatesBuffer[inputSize];
        int yCoordinatesBuffer[inputSize];
        getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
                xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
                useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
                &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS));
                &queuePool, digraphs, digraphsSize);
    } else { // Normal processing
        getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize,
                bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
+3 −7
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <map>
#include <stdint.h>
#include "defines.h"
#include "digraph_utils.h"

namespace latinime {

@@ -29,8 +30,6 @@ class TerminalAttributes;
class WordsPriorityQueuePool;

class UnigramDictionary {
    typedef struct { int first; int second; int replacement; } digraph_t;

 public:
    // Error tolerances
    static const int DEFAULT_MAX_ERRORS = 2;
@@ -57,13 +56,13 @@ class UnigramDictionary {
            const bool useFullEditDistance, Correction *correction,
            WordsPriorityQueuePool *queuePool) const;
    int getDigraphReplacement(const int *codes, const int i, const int inputSize,
            const digraph_t *const digraphs, const unsigned int digraphsSize) const;
            const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const;
    void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer,
            int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap,
            const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc,
            const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
            WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs,
            WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs,
            const unsigned int digraphsSize) const;
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int inputSize,
@@ -111,9 +110,6 @@ class UnigramDictionary {
    const int ROOT_POS;
    const int MAX_DIGRAPH_SEARCH_DEPTH;
    const int FLAGS;

    static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
    static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_DICTIONARY_H