Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 08fd3d95 authored by Tom Ouyang's avatar Tom Ouyang Committed by Android (Google) Code Review
Browse files

Merge "Add DigraphUtils class"

parents 65534ffc 2fdf5b70
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@ LATIN_IME_CORE_SRC_FILES := \
    correction.cpp \
    dictionary.cpp \
    dic_traverse_wrapper.cpp \
    digraph_utils.cpp \
    proximity_info.cpp \
    proximity_info_params.cpp \
    proximity_info_state.cpp \
+93 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "binary_format.h"
#include "defines.h"
#include "digraph_utils.h"

namespace latinime {

const DigraphUtils::digraph_t DigraphUtils::GERMAN_UMLAUT_DIGRAPHS[] =
        { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
        { 'o', 'e', 0x00F6 },   // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
        { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
const DigraphUtils::digraph_t DigraphUtils::FRENCH_LIGATURES_DIGRAPHS[] =
        { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
        { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE

/* static */ bool DigraphUtils::hasDigraphForCodePoint(
        const int dictFlags, const int compositeGlyphCodePoint) {
    if (DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint)) {
        return true;
    }
    return false;
}

// Retrieves the set of all digraphs associated with the given dictionary.
// Returns the size of the digraph array, or 0 if none exist.
/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(
        const int dictFlags, const DigraphUtils::digraph_t **digraphs) {
    if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) {
        *digraphs = DigraphUtils::GERMAN_UMLAUT_DIGRAPHS;
        return NELEMS(DigraphUtils::GERMAN_UMLAUT_DIGRAPHS);
    }
    if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) {
        *digraphs = DigraphUtils::FRENCH_LIGATURES_DIGRAPHS;
        return NELEMS(DigraphUtils::FRENCH_LIGATURES_DIGRAPHS);
    }
    return 0;
}

// Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index
// (which specifies the first or second codepoint in the digraph).
/* static */ int DigraphUtils::getDigraphCodePointForIndex(const int dictFlags,
        const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex) {
    if (digraphCodePointIndex == NOT_A_DIGRAPH_INDEX) {
        return NOT_A_CODE_POINT;
    }
    const DigraphUtils::digraph_t *digraph =
            DigraphUtils::getDigraphForCodePoint(dictFlags, compositeGlyphCodePoint);
    if (!digraph) {
        return NOT_A_CODE_POINT;
    }
    if (digraphCodePointIndex == FIRST_DIGRAPH_CODEPOINT) {
        return digraph->first;
    } else if (digraphCodePointIndex == SECOND_DIGRAPH_CODEPOINT) {
        return digraph->second;
    }
    ASSERT(false);
    return NOT_A_CODE_POINT;
}

/**
 * Returns the digraph for the input composite glyph codepoint, or 0 if none exists.
 * dictFlags: the dictionary flags needed to determine which digraphs are supported.
 * compositeGlyphCodePoint: the method returns the digraph corresponding to this codepoint.
 */
/* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForCodePoint(
        const int dictFlags, const int compositeGlyphCodePoint) {
    const DigraphUtils::digraph_t *digraphs = 0;
    const int digraphsSize =
            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(dictFlags, &digraphs);
    for (int i = 0; i < digraphsSize; i++) {
        if (digraphs[i].compositeGlyph == compositeGlyphCodePoint) {
            return &digraphs[i];
        }
    }
    return 0;
}

} // namespace latinime
+47 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef DIGRAPH_UTILS_H
#define DIGRAPH_UTILS_H

namespace latinime {

class DigraphUtils {
 public:
    typedef enum {
        NOT_A_DIGRAPH_INDEX,
        FIRST_DIGRAPH_CODEPOINT,
        SECOND_DIGRAPH_CODEPOINT
    } DigraphCodePointIndex;

    typedef struct { int first; int second; int compositeGlyph; } digraph_t;

    static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint);
    static int getAllDigraphsForDictionaryAndReturnSize(
            const int dictFlags, const digraph_t **digraphs);
    static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint,
            const DigraphCodePointIndex digraphCodePointIndex);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils);
    static const digraph_t *getDigraphForCodePoint(
            const int dictFlags, const int compositeGlyphCodePoint);

    static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
    static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
};
} // namespace latinime
#endif // DIGRAPH_UTILS_H
+9 −22
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include "char_utils.h"
#include "defines.h"
#include "dictionary.h"
#include "digraph_utils.h"
#include "proximity_info.h"
#include "terminal_attributes.h"
#include "unigram_dictionary.h"
@@ -30,15 +31,6 @@

namespace latinime {

const UnigramDictionary::digraph_t UnigramDictionary::GERMAN_UMLAUT_DIGRAPHS[] =
        { { 'a', 'e', 0x00E4 }, // U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
        { 'o', 'e', 0x00F6 }, // U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
        { 'u', 'e', 0x00FC } }; // U+00FC : LATIN SMALL LETTER U WITH DIAERESIS

const UnigramDictionary::digraph_t UnigramDictionary::FRENCH_LIGATURES_DIGRAPHS[] =
        { { 'a', 'e', 0x00E6 }, // U+00E6 : LATIN SMALL LETTER AE
        { 'o', 'e', 0x0153 } }; // U+0153 : LATIN SMALL LIGATURE OE

// TODO: check the header
UnigramDictionary::UnigramDictionary(const uint8_t *const streamStart, const unsigned int flags)
        : DICT_ROOT(streamStart), ROOT_POS(0),
@@ -58,7 +50,7 @@ static void addWord(int *word, int length, int probability, WordsPriorityQueue *

// Return the replacement code point for a digraph, or 0 if none.
int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, const int inputSize,
        const digraph_t *const digraphs, const unsigned int digraphsSize) const {
        const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {

    // There can't be a digraph if we don't have at least 2 characters to examine
    if (i + 2 > inputSize) return false;
@@ -74,7 +66,7 @@ int UnigramDictionary::getDigraphReplacement(const int *codes, const int i, cons

    // It's an interesting digraph if the second char matches too.
    if (digraphs[lastDigraphIndex].second == codes[i + 1]) {
        return digraphs[lastDigraphIndex].replacement;
        return digraphs[lastDigraphIndex].compositeGlyph;
    } else {
        return 0;
    }
@@ -93,7 +85,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
        const bool useFullEditDistance, const int *codesSrc,
        const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
        WordsPriorityQueuePool *queuePool,
        const digraph_t *const digraphs, const unsigned int digraphsSize) const {
        const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const {
    ASSERT(sizeof(codesDest[0]) == sizeof(codesSrc[0]));
    ASSERT(sizeof(xCoordinatesBuffer[0]) == sizeof(xcoordinates[0]));
    ASSERT(sizeof(yCoordinatesBuffer[0]) == sizeof(ycoordinates[0]));
@@ -169,7 +161,10 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
    queuePool.clearAll();
    Correction masterCorrection;
    masterCorrection.resetCorrection();
    if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & FLAGS)
    const DigraphUtils::digraph_t *digraphs = 0;
    const int digraphsSize =
            DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(FLAGS, &digraphs);
    if (digraphsSize > 0)
    { // Incrementally tune the word and try all possibilities
        int codesBuffer[sizeof(*inputCodePoints) * inputSize];
        int xCoordinatesBuffer[inputSize];
@@ -177,15 +172,7 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *x
        getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
                xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
                useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
                &queuePool, GERMAN_UMLAUT_DIGRAPHS, NELEMS(GERMAN_UMLAUT_DIGRAPHS));
    } else if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & FLAGS) {
        int codesBuffer[sizeof(*inputCodePoints) * inputSize];
        int xCoordinatesBuffer[inputSize];
        int yCoordinatesBuffer[inputSize];
        getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
                xCoordinatesBuffer, yCoordinatesBuffer, inputSize, bigramMap, bigramFilter,
                useFullEditDistance, inputCodePoints, inputSize, 0, codesBuffer, &masterCorrection,
                &queuePool, FRENCH_LIGATURES_DIGRAPHS, NELEMS(FRENCH_LIGATURES_DIGRAPHS));
                &queuePool, digraphs, digraphsSize);
    } else { // Normal processing
        getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, inputCodePoints, inputSize,
                bigramMap, bigramFilter, useFullEditDistance, &masterCorrection, &queuePool);
+3 −7
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <map>
#include <stdint.h>
#include "defines.h"
#include "digraph_utils.h"

namespace latinime {

@@ -29,8 +30,6 @@ class TerminalAttributes;
class WordsPriorityQueuePool;

class UnigramDictionary {
    typedef struct { int first; int second; int replacement; } digraph_t;

 public:
    // Error tolerances
    static const int DEFAULT_MAX_ERRORS = 2;
@@ -57,13 +56,13 @@ class UnigramDictionary {
            const bool useFullEditDistance, Correction *correction,
            WordsPriorityQueuePool *queuePool) const;
    int getDigraphReplacement(const int *codes, const int i, const int inputSize,
            const digraph_t *const digraphs, const unsigned int digraphsSize) const;
            const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const;
    void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer,
            int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap,
            const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc,
            const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
            WordsPriorityQueuePool *queuePool, const digraph_t *const digraphs,
            WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs,
            const unsigned int digraphsSize) const;
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int inputSize,
@@ -111,9 +110,6 @@ class UnigramDictionary {
    const int ROOT_POS;
    const int MAX_DIGRAPH_SEARCH_DEPTH;
    const int FLAGS;

    static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];
    static const digraph_t FRENCH_LIGATURES_DIGRAPHS[];
};
} // namespace latinime
#endif // LATINIME_UNIGRAM_DICTIONARY_H