Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5ac44bdc authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi
Browse files

Move shortcut reading methods.

Moved form BinaryFormat to BinaryDictionaryTerminalAttributesReadingUtils.

Bug: 6669677

Change-Id: Ia1ab25854effbf61df37837fe26755ac7dc4d020
parent 4da287d0
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -33,6 +33,9 @@ const TaUtils::TerminalAttributeFlags TaUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const TaUtils::TerminalAttributeFlags TaUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int TaUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
// The numeric value of the shortcut probability that means 'whitelist'.
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;

/* static */ int TaUtils::getBigramAddressAndForwardPointer(
        const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
+31 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
 public:
    typedef uint8_t TerminalAttributeFlags;
    typedef TerminalAttributeFlags BigramFlags;
    typedef TerminalAttributeFlags ShortcutFlags;

    static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
@@ -59,6 +60,34 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
            const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
                    int *const pos);

    // Shortcuts reading methods
    // This method returns the size of the shortcut list region excluding the shortcut list size
    // field at the beginning.
    static AK_FORCE_INLINE int getShortcutListSizeAndForwardPointer(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
        // readUint16andAdvancePosition() returns an offset *including* the uint16 field itself.
        return ByteArrayUtils::readUint16andAdvancePosition(
                binaryDictionaryInfo->getDictRoot(), pos) - SHORTCUT_LIST_SIZE_FIELD_SIZE;
    }

    static AK_FORCE_INLINE void skipShortcuts(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
        const int shortcutListSize = getShortcutListSizeAndForwardPointer(
                binaryDictionaryInfo, pos);
        *pos += shortcutListSize;
    }

    static AK_FORCE_INLINE bool isWhitelist(const ShortcutFlags flags) {
        return getProbabilityFromFlags(flags) == WHITELIST_SHORTCUT_PROBABILITY;
    }

    static AK_FORCE_INLINE int readShortcutTarget(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, const int maxLength,
            int *const outWord, int *const pos) {
        return ByteArrayUtils::readStringAndAdvancePosition(
                binaryDictionaryInfo->getDictRoot(), maxLength, outWord, pos);
    }

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryTerminalAttributesReadingUtils);

@@ -70,6 +99,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
    static const TerminalAttributeFlags FLAG_ATTRIBUTE_HAS_NEXT;
    static const TerminalAttributeFlags MASK_ATTRIBUTE_PROBABILITY;
    static const int ATTRIBUTE_ADDRESS_SHIFT;
    static const int SHORTCUT_LIST_SIZE_FIELD_SIZE;
    static const int WHITELIST_SHORTCUT_PROBABILITY;

    static AK_FORCE_INLINE bool isOffsetNegative(const TerminalAttributeFlags flags) {
        return (flags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) != 0;
+0 −39
Original line number Diff line number Diff line
@@ -52,14 +52,10 @@ class BinaryFormat {

    // Mask for attribute probability, stored on 4 bits inside the flags byte.
    static const int MASK_ATTRIBUTE_PROBABILITY = 0x0F;
    // The numeric value of the shortcut probability that means 'whitelist'.
    static const int WHITELIST_SHORTCUT_PROBABILITY = 15;

    // Mask and flags for attribute address type selection.
    static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;

    static const int SHORTCUT_LIST_SIZE_SIZE = 2;

    static bool hasBlacklistedOrNotAWordFlag(const int flags);
    static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos);
    static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos);
@@ -73,9 +69,6 @@ class BinaryFormat {
            const int pos);
    static int readChildrenPosition(const uint8_t *const dict, const uint8_t flags, const int pos);
    static bool hasChildrenInFlags(const uint8_t flags);
    static int getAttributeAddressAndForwardPointer(const uint8_t *const dict, const uint8_t flags,
            int *pos);
    static int getAttributeProbabilityFromFlags(const int flags);
    static int getTerminalPosition(const uint8_t *const root, const int *const inWord,
            const int length, const bool forceLowerCaseSearch);
    static int getWordAtAddress(const uint8_t *const root, const int address, const int maxDepth,
@@ -260,38 +253,6 @@ inline bool BinaryFormat::hasChildrenInFlags(const uint8_t flags) {
    return (FLAG_GROUP_ADDRESS_TYPE_NOADDRESS != (MASK_GROUP_ADDRESS_TYPE & flags));
}

AK_FORCE_INLINE int BinaryFormat::getAttributeAddressAndForwardPointer(const uint8_t *const dict,
        const uint8_t flags, int *pos) {
    int offset = 0;
    const int origin = *pos;
    switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
        case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
            offset = dict[origin];
            *pos = origin + 1;
            break;
        case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
            offset = dict[origin] << 8;
            offset += dict[origin + 1];
            *pos = origin + 2;
            break;
        case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
            offset = dict[origin] << 16;
            offset += dict[origin + 1] << 8;
            offset += dict[origin + 2];
            *pos = origin + 3;
            break;
    }
    if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE & flags) {
        return origin - offset;
    } else {
        return origin + offset;
    }
}

inline int BinaryFormat::getAttributeProbabilityFromFlags(const int flags) {
    return flags & MASK_ATTRIBUTE_PROBABILITY;
}

// This function gets the byte position of the last chargroup of the exact matching word in the
// dictionary. If no match is found, it returns NOT_VALID_WORD.
AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root,
+6 −6
Original line number Diff line number Diff line
@@ -29,15 +29,15 @@ class ShortcutUtils {
            int outputWordIndex, const int finalScore, int *const outputCodePoints,
            int *const frequencies, int *const outputTypes, const bool sameAsTyped) {
        TerminalAttributes::ShortcutIterator iterator = terminalAttributes->getShortcutIterator();
        while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
        int shortcutTarget[MAX_WORD_LENGTH];
            int shortcutProbability;
            const int shortcutTargetStringLength = iterator.getNextShortcutTarget(
                    MAX_WORD_LENGTH, shortcutTarget, &shortcutProbability);
        while (iterator.hasNextShortcutTarget() && outputWordIndex < MAX_RESULTS) {
            bool isWhilelist;
            int shortcutTargetStringLength;
            iterator.nextShortcutTarget(MAX_WORD_LENGTH, shortcutTarget,
                    &shortcutTargetStringLength, &isWhilelist);
            int shortcutScore;
            int kind;
            if (shortcutProbability == BinaryFormat::WHITELIST_SHORTCUT_PROBABILITY
                    && sameAsTyped) {
            if (isWhilelist && sameAsTyped) {
                shortcutScore = S_INT_MAX;
                kind = Dictionary::KIND_WHITELIST;
            } else {
+37 −30
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <stdint.h>

#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/dictionary/binary_format.h"

namespace latinime {
@@ -33,60 +34,66 @@ class TerminalAttributes {
 public:
    class ShortcutIterator {
     public:
        ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos,
                const uint8_t flags)
                : mBinaryDicitionaryInfo(binaryDictionaryInfo), mPos(pos),
                  mHasNextShortcutTarget(0 != (flags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS)) {
        }
        ShortcutIterator(const BinaryDictionaryInfo *const binaryDictionaryInfo,
                const int shortcutPos, const bool hasShortcutList)
                : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(shortcutPos),
                  mHasNextShortcutTarget(hasShortcutList) {}

        inline bool hasNextShortcutTarget() const {
            return mHasNextShortcutTarget;
        }

        // Gets the shortcut target itself as an int string. For parameters and return value
        // see BinaryFormat::getWordAtAddress.
        inline int getNextShortcutTarget(const int maxDepth, int *outWord, int *outFreq) {
            const int shortcutFlags = BinaryFormat::getFlagsAndForwardPointer(
                    mBinaryDicitionaryInfo->getDictRoot(), &mPos);
            mHasNextShortcutTarget = 0 != (shortcutFlags & BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT);
            unsigned int i;
            for (i = 0; i < MAX_WORD_LENGTH; ++i) {
                const int codePoint = BinaryFormat::getCodePointAndForwardPointer(
                        mBinaryDicitionaryInfo->getDictRoot(), &mPos);
                if (NOT_A_CODE_POINT == codePoint) break;
                outWord[i] = codePoint;
        // Gets the shortcut target itself as an int string and put it to outTarget, put its length
        // to outTargetLength, put whether it is whitelist to outIsWhitelist.
        AK_FORCE_INLINE void nextShortcutTarget(
                const int maxDepth, int *const outTarget, int *const outTargetLength,
                bool *const outIsWhitelist) {
            const BinaryDictionaryTerminalAttributesReadingUtils::ShortcutFlags flags =
                    BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
                            mBinaryDictionaryInfo, &mPos);
            mHasNextShortcutTarget =
                    BinaryDictionaryTerminalAttributesReadingUtils::hasNext(flags);
            if (outIsWhitelist) {
                *outIsWhitelist =
                        BinaryDictionaryTerminalAttributesReadingUtils::isWhitelist(flags);
            }
            if (outTargetLength) {
                *outTargetLength =
                        BinaryDictionaryTerminalAttributesReadingUtils::readShortcutTarget(
                                mBinaryDictionaryInfo, maxDepth, outTarget, &mPos);
            }
            *outFreq = BinaryFormat::getAttributeProbabilityFromFlags(shortcutFlags);
            return i;
        }

     private:
        const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
        const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
        int mPos;
        bool mHasNextShortcutTarget;
    };

    TerminalAttributes(const BinaryDictionaryInfo *const binaryDicitonaryInfo,
            const uint8_t flags, const int pos)
            : mBinaryDicitionaryInfo(binaryDicitonaryInfo), mFlags(flags), mStartPos(pos) {
    }
    TerminalAttributes(const BinaryDictionaryInfo *const binaryDictionaryInfo,
            const uint8_t nodeFlags, const int shortcutPos)
            : mBinaryDictionaryInfo(binaryDictionaryInfo),
              mNodeFlags(nodeFlags), mShortcutListSizePos(shortcutPos) {}

    inline ShortcutIterator getShortcutIterator() const {
        // The size of the shortcuts is stored here so that the whole shortcut chunk can be
        // skipped quickly, so we ignore it.
        return ShortcutIterator(
                mBinaryDicitionaryInfo, mStartPos + BinaryFormat::SHORTCUT_LIST_SIZE_SIZE, mFlags);
        int shortcutPos = mShortcutListSizePos;
        BinaryDictionaryTerminalAttributesReadingUtils::getShortcutListSizeAndForwardPointer(
                mBinaryDictionaryInfo, &shortcutPos);
        const bool hasShortcutList = 0 != (mNodeFlags & BinaryFormat::FLAG_HAS_SHORTCUT_TARGETS);
        return ShortcutIterator(mBinaryDictionaryInfo, shortcutPos, hasShortcutList);
    }

    bool isBlacklistedOrNotAWord() const {
        return BinaryFormat::hasBlacklistedOrNotAWordFlag(mFlags);
        return BinaryFormat::hasBlacklistedOrNotAWordFlag(mNodeFlags);
    }

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(TerminalAttributes);
    const BinaryDictionaryInfo *const mBinaryDicitionaryInfo;
    const uint8_t mFlags;
    const int mStartPos;
    const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
    const uint8_t mNodeFlags;
    const int mShortcutListSizePos;
};
} // namespace latinime
#endif // LATINIME_TERMINAL_ATTRIBUTES_H