Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb64d0cd authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Read shortcuts as strings in the dictionary."

parents b6313f72 9a933a74
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -123,6 +123,7 @@ int BigramDictionary::getBigrams(unsigned short *prevWord, int prevWordLength, i
    }
    pos = BinaryFormat::skipChildrenPosition(flags, pos);
    pos = BinaryFormat::skipFrequency(flags, pos);
    pos = BinaryFormat::skipShortcuts(root, flags, pos);
    int bigramFlags;
    int bigramCount = 0;
    do {
+31 −9
Original line number Diff line number Diff line
@@ -40,6 +40,9 @@ class BinaryFormat {
    // implementations. On this occasion, we made the magic number 32 bits long.
    const static uint32_t FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;

    const static int CHARACTER_ARRAY_TERMINATOR_SIZE = 1;
    const static int SHORTCUT_LIST_SIZE_SIZE = 2;

    static int detectFormat(const uint8_t* const dict);
    static unsigned int getHeaderSize(const uint8_t* const dict);
    static int getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos);
@@ -47,9 +50,10 @@ class BinaryFormat {
    static int32_t getCharCodeAndForwardPointer(const uint8_t* const dict, int* pos);
    static int readFrequencyWithoutMovingPointer(const uint8_t* const dict, const int pos);
    static int skipOtherCharacters(const uint8_t* const dict, const int pos);
    static int skipAttributes(const uint8_t* const dict, const int pos);
    static int skipChildrenPosition(const uint8_t flags, const int pos);
    static int skipFrequency(const uint8_t flags, const int pos);
    static int skipShortcuts(const uint8_t* const dict, const uint8_t flags, const int pos);
    static int skipBigrams(const uint8_t* const dict, const uint8_t flags, const int pos);
    static int skipAllAttributes(const uint8_t* const dict, const uint8_t flags, const int pos);
    static int skipChildrenPosAndAttributes(const uint8_t* const dict, const uint8_t flags,
            const int pos);
@@ -157,12 +161,12 @@ static inline int attributeAddressSize(const uint8_t flags) {
    */
}

inline int BinaryFormat::skipAttributes(const uint8_t* const dict, const int pos) {
static inline int skipExistingBigrams(const uint8_t* const dict, const int pos) {
    int currentPos = pos;
    uint8_t flags = getFlagsAndForwardPointer(dict, &currentPos);
    uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
    while (flags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT) {
        currentPos += attributeAddressSize(flags);
        flags = getFlagsAndForwardPointer(dict, &currentPos);
        flags = BinaryFormat::getFlagsAndForwardPointer(dict, &currentPos);
    }
    currentPos += attributeAddressSize(flags);
    return currentPos;
@@ -174,6 +178,10 @@ static inline int childrenAddressSize(const uint8_t flags) {
    /* See the note in attributeAddressSize. The same applies here */
}

static inline int shortcutByteSize(const uint8_t* const dict, const int pos) {
    return ((int)(dict[pos] << 8)) + (dict[pos + 1]);
}

inline int BinaryFormat::skipChildrenPosition(const uint8_t flags, const int pos) {
    return pos + childrenAddressSize(flags);
}
@@ -182,16 +190,30 @@ inline int BinaryFormat::skipFrequency(const uint8_t flags, const int pos) {
    return UnigramDictionary::FLAG_IS_TERMINAL & flags ? pos + 1 : pos;
}

inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
inline int BinaryFormat::skipShortcuts(const uint8_t* const dict, const uint8_t flags,
        const int pos) {
    // This function skips all attributes: shortcuts and bigrams.
    int newPos = pos;
    if (UnigramDictionary::FLAG_HAS_SHORTCUT_TARGETS & flags) {
        newPos = skipAttributes(dict, newPos);
        return pos + shortcutByteSize(dict, pos);
    } else {
        return pos;
    }
}

inline int BinaryFormat::skipBigrams(const uint8_t* const dict, const uint8_t flags,
        const int pos) {
    if (UnigramDictionary::FLAG_HAS_BIGRAMS & flags) {
        newPos = skipAttributes(dict, newPos);
        return skipExistingBigrams(dict, pos);
    } else {
        return pos;
    }
}

inline int BinaryFormat::skipAllAttributes(const uint8_t* const dict, const uint8_t flags,
        const int pos) {
    // This function skips all attributes: shortcuts and bigrams.
    int newPos = pos;
    newPos = skipShortcuts(dict, flags, newPos);
    newPos = skipBigrams(dict, flags, newPos);
    return newPos;
}

+12 −8
Original line number Diff line number Diff line
@@ -45,13 +45,19 @@ class TerminalAttributes {

        // Gets the shortcut target itself as a uint16_t string. For parameters and return value
        // see BinaryFormat::getWordAtAddress.
        // TODO: make the output an uint32_t* to handle the whole unicode range.
        inline int getNextShortcutTarget(const int maxDepth, uint16_t* outWord) {
            const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(mDict, &mPos);
            mHasNextShortcutTarget =
                    0 != (shortcutFlags & UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT);
            int shortcutAddress =
                    BinaryFormat::getAttributeAddressAndForwardPointer(mDict, shortcutFlags, &mPos);
            return BinaryFormat::getWordAtAddress(mDict, shortcutAddress, maxDepth, outWord);
            unsigned int i;
            for (i = 0; i < MAX_WORD_LENGTH_INTERNAL; ++i) {
                const int charCode = BinaryFormat::getCharCodeAndForwardPointer(mDict, &mPos);
                if (NOT_A_CHARACTER == charCode) break;
                outWord[i] = (uint16_t)charCode;
            }
            mPos += BinaryFormat::CHARACTER_ARRAY_TERMINATOR_SIZE;
            return i;
        }
    };

@@ -65,12 +71,10 @@ class TerminalAttributes {
            mDict(dict), mFlags(flags), mStartPos(pos) {
    }

    inline bool isShortcutOnly() const {
        return 0 != (mFlags & UnigramDictionary::FLAG_IS_SHORTCUT_ONLY);
    }

    inline ShortcutIterator getShortcutIterator() const {
        return ShortcutIterator(mDict, mStartPos, mFlags);
        // The size of the shortcuts is stored here so that the whole shortcut chunk can be
        // skipped quickly, so we ignore it.
        return ShortcutIterator(mDict, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
    }
};
} // namespace latinime
+5 −5
Original line number Diff line number Diff line
@@ -366,10 +366,9 @@ inline void UnigramDictionary::onTerminal(const int freq,
        WordsPriorityQueue *masterQueue = queuePool->getMasterQueue();
        const int finalFreq = correction->getFinalFreq(freq, &wordPointer, &wordLength);
        if (finalFreq != NOT_A_FREQUENCY) {
            if (!terminalAttributes.isShortcutOnly()) {
            addWord(wordPointer, wordLength, finalFreq, masterQueue);
            }

            const int shortcutFreq = finalFreq > 0 ? finalFreq - 1 : 0;
            // Please note that the shortcut candidates will be added to the master queue only.
            TerminalAttributes::ShortcutIterator iterator =
                    terminalAttributes.getShortcutIterator();
@@ -379,11 +378,12 @@ inline void UnigramDictionary::onTerminal(const int freq,
                // We need to either modulate the frequency of each shortcut according
                // to its own shortcut frequency or to make the queue
                // so that the insert order is protected inside the queue for words
                // with the same score.
                // with the same score. For the moment we use -1 to make sure the shortcut will
                // never be in front of the word.
                uint16_t shortcutTarget[MAX_WORD_LENGTH_INTERNAL];
                const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                        MAX_WORD_LENGTH_INTERNAL, shortcutTarget);
                addWord(shortcutTarget, shortcutTargetStringLength, finalFreq, masterQueue);
                addWord(shortcutTarget, shortcutTargetStringLength, shortcutFreq, masterQueue);
            }
        }
    }
+0 −4
Original line number Diff line number Diff line
@@ -49,10 +49,6 @@ class UnigramDictionary {
    static const int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
    // Flag for bigram presence
    static const int FLAG_HAS_BIGRAMS = 0x04;
    // Flag for shortcut-only words. Some words are shortcut-only, which means they match when
    // the user types them but they don't pop in the suggestion strip, only the words they are
    // shortcuts for do.
    static const int FLAG_IS_SHORTCUT_ONLY = 0x02;

    // Attribute (bigram/shortcut) related flags:
    // Flag for presence of more attributes