Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9c4396ab authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Generalize the digraph code"

parents 2e66afcc 6c30061c
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -232,9 +232,9 @@ static inline void prof_out(void) {

#define FIRST_WORD_INDEX 0

// TODO: Reduce this constant if possible; check the maximum number of umlauts in the same German
// word in the dictionary
#define DEFAULT_MAX_UMLAUT_SEARCH_DEPTH 5
// TODO: Reduce this constant if possible; check the maximum number of digraphs in the same
// word in the dictionary for languages with digraphs, like German and French
#define DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH 5

// Minimum suggest depth for one word for all cases except for missing space suggestions.
#define MIN_SUGGEST_DEPTH 1
+16 −12
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed
      // TODO : remove this variable.
    ROOT_POS(0),
    BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)),
    MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) {
    MAX_DIGRAPH_SEARCH_DEPTH(DEFAULT_MAX_DIGRAPH_SEARCH_DEPTH) {
    if (DEBUG_DICT) {
        AKLOGI("UnigramDictionary - constructor");
    }
@@ -64,7 +64,8 @@ static inline void addWord(
    queue->push(frequency, word, length);
}

bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codesSize) const {
bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codesSize,
        const digraph_t* const digraphs, const unsigned int digraphsSize) const {

    // There can't be a digraph if we don't have at least 2 characters to examine
    if (i + 2 > codesSize) return false;
@@ -72,15 +73,14 @@ bool UnigramDictionary::isDigraph(const int *codes, const int i, const int codes
    // Search for the first char of some digraph
    int lastDigraphIndex = -1;
    const int thisChar = codes[i * MAX_PROXIMITY_CHARS];
    for (lastDigraphIndex = sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]) - 1;
            lastDigraphIndex >= 0; --lastDigraphIndex) {
        if (thisChar == GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].first) break;
    for (lastDigraphIndex = digraphsSize - 1; lastDigraphIndex >= 0; --lastDigraphIndex) {
        if (thisChar == digraphs[lastDigraphIndex].first) break;
    }
    // No match: return early
    if (lastDigraphIndex < 0) return false;

    // It's an interesting digraph if the second char matches too.
    return GERMAN_UMLAUT_DIGRAPHS[lastDigraphIndex].second == codes[(i + 1) * MAX_PROXIMITY_CHARS];
    return digraphs[lastDigraphIndex].second == codes[(i + 1) * MAX_PROXIMITY_CHARS];
}

// Mostly the same arguments as the non-recursive version, except:
@@ -94,14 +94,15 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
        int *xCoordinatesBuffer, int *yCoordinatesBuffer,
        const int codesBufferSize, const int flags, const int *codesSrc,
        const int codesRemain, const int currentDepth, int *codesDest, Correction *correction,
        WordsPriorityQueuePool *queuePool) {
        WordsPriorityQueuePool *queuePool,
        const digraph_t* const digraphs, const unsigned int digraphsSize) {

    const int startIndex = (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS;
    if (currentDepth < MAX_UMLAUT_SEARCH_DEPTH) {
    if (currentDepth < MAX_DIGRAPH_SEARCH_DEPTH) {
        for (int i = 0; i < codesRemain; ++i) {
            xCoordinatesBuffer[startIndex + i] = xcoordinates[codesBufferSize - codesRemain + i];
            yCoordinatesBuffer[startIndex + i] = ycoordinates[codesBufferSize - codesRemain + i];
            if (isDigraph(codesSrc, i, codesRemain)) {
            if (isDigraph(codesSrc, i, codesRemain, digraphs, digraphsSize)) {
                // Found a digraph. We will try both spellings. eg. the word is "pruefen"

                // Copy the word up to the first char of the digraph, then continue processing
@@ -115,7 +116,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
                        codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, flags,
                        codesSrc + (i + 1) * MAX_PROXIMITY_CHARS, codesRemain - i - 1,
                        currentDepth + 1, codesDest + i * MAX_PROXIMITY_CHARS, correction,
                        queuePool);
                        queuePool, digraphs, digraphsSize);

                // Copy the second char of the digraph in place, then continue processing on
                // the remaining part of the word.
@@ -125,7 +126,8 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximit
                getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates,
                        codesBuffer, xCoordinatesBuffer, yCoordinatesBuffer, codesBufferSize, flags,
                        codesSrc + i * MAX_PROXIMITY_CHARS, codesRemain - i, currentDepth + 1,
                        codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool);
                        codesDest + i * MAX_PROXIMITY_CHARS, correction, queuePool,
                        digraphs, digraphsSize);
                return;
            }
        }
@@ -164,7 +166,9 @@ int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo,
        int yCoordinatesBuffer[codesSize];
        getWordWithDigraphSuggestionsRec(proximityInfo, xcoordinates, ycoordinates, codesBuffer,
                xCoordinatesBuffer, yCoordinatesBuffer,
                codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool);
                codesSize, flags, codes, codesSize, 0, codesBuffer, masterCorrection, queuePool,
                GERMAN_UMLAUT_DIGRAPHS,
                sizeof(GERMAN_UMLAUT_DIGRAPHS) / sizeof(GERMAN_UMLAUT_DIGRAPHS[0]));
    } else { // Normal processing
        getWordSuggestions(proximityInfo, xcoordinates, ycoordinates, codes, codesSize, flags,
                masterCorrection, queuePool);
+8 −4
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ namespace latinime {

class TerminalAttributes;
class UnigramDictionary {
    typedef struct { int first; int second; } digraph_t;

 public:
    // Mask and flags for children address type selection.
    static const int MASK_GROUP_ADDRESS_TYPE = 0xC0;
@@ -86,13 +88,15 @@ class UnigramDictionary {
    void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int inputLength,
            const int flags, Correction *correction, WordsPriorityQueuePool *queuePool);
    bool isDigraph(const int *codes, const int i, const int codesSize) const;
    bool isDigraph(const int *codes, const int i, const int codesSize,
            const digraph_t* const digraphs, const unsigned int digraphsSize) const;
    void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo,
        const int *xcoordinates, const int* ycoordinates, const int *codesBuffer,
        int *xCoordinatesBuffer, int *yCoordinatesBuffer,
        const int codesBufferSize, const int flags, const int* codesSrc,
        const int codesRemain, const int currentDepth, int* codesDest, Correction *correction,
        WordsPriorityQueuePool* queuePool);
        WordsPriorityQueuePool* queuePool, const digraph_t* const digraphs,
        const unsigned int digraphsSize);
    void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
            const int *ycoordinates, const int *codes, const int codesSize, Correction *correction);
    void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates,
@@ -145,7 +149,7 @@ class UnigramDictionary {
    const int FULL_WORD_MULTIPLIER;
    const int ROOT_POS;
    const unsigned int BYTES_IN_ONE_CHAR;
    const int MAX_UMLAUT_SEARCH_DEPTH;
    const int MAX_DIGRAPH_SEARCH_DEPTH;

    // Flags for special processing
    // Those *must* match the flags in BinaryDictionary.Flags.ALL_FLAGS in BinaryDictionary.java
@@ -155,7 +159,7 @@ class UnigramDictionary {
        REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1,
        USE_FULL_EDIT_DISTANCE = 0x2
    };
    static const struct digraph_t { int first; int second; } GERMAN_UMLAUT_DIGRAPHS[];
    static const digraph_t GERMAN_UMLAUT_DIGRAPHS[];

    // Still bundled members
    unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];// TODO: remove