Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0933353c authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi Committed by Android (Google) Code Review
Browse files

Merge "Read version 3 dictionary header."

parents db296305 5ae8722b
Loading
Loading
Loading
Loading
+23 −20
Original line number Diff line number Diff line
@@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4;
// The versions of Latin IME that only handle format version 1 only test for the magic
// number, so we had to change it so that version 2 files would be rejected by older
// implementations. On this occasion, we made the magic number 32 bits long.
const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12
const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12;

/* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION
        BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict,
@@ -46,20 +46,23 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12;
    }
    const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0);
    switch (magicNumber) {
    case FORMAT_VERSION_2_MAGIC_NUMBER:
        // Version 2 dictionaries are at least 12 bytes long.
        // If this dictionary has the version 2 magic number but is less than 12 bytes long,
        case HEADER_VERSION_2_MAGIC_NUMBER:
            // Version 2 header are at least 12 bytes long.
            // If this header has the version 2 magic number but is less than 12 bytes long,
            // then it's an unknown format and we need to avoid confidently reading the next bytes.
        if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) {
            if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) {
                return UNKNOWN_VERSION;
            }
        // Format 2 header is as follows:
            // Version 2 header is as follows:
            // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE
        // Version number (2 bytes) 0x00 0x02
            // Version number (2 bytes)
            // Options (2 bytes)
            // Header size (4 bytes) : integer, big endian
            if (ByteArrayUtils::readUint16(dict, 4) == 2) {
                return VERSION_2;
            } else if (ByteArrayUtils::readUint16(dict, 4) == 3) {
                // TODO: Support version 3 dictionary.
                return UNKNOWN_VERSION;
            } else {
                return UNKNOWN_VERSION;
            }
+4 −4
Original line number Diff line number Diff line
@@ -33,9 +33,9 @@ namespace latinime {
 */
class BinaryDictionaryFormatUtils {
 public:
    // TODO: Support version 3 format.
    enum FORMAT_VERSION {
        VERSION_2 = 1,
        VERSION_2,
        VERSION_3,
        UNKNOWN_VERSION
    };

@@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils {
    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils);

    static const int DICTIONARY_MINIMUM_SIZE;
    static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER;
    static const int FORMAT_VERSION_2_MINIMUM_SIZE;
    static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER;
    static const int HEADER_VERSION_2_MINIMUM_SIZE;
};
} // namespace latinime
#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
+16 −12
Original line number Diff line number Diff line
@@ -26,10 +26,10 @@ namespace latinime {

const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256;

const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2;
const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4;

const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
        BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0;
@@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags

/* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize(
        const BinaryDictionaryInfo *const binaryDictionaryInfo) {
    switch (binaryDictionaryInfo->getFormat()) {
        case BinaryDictionaryFormatUtils::VERSION_2:
    switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
        case HEADER_VERSION_2:
            // See the format of the header in the comment in
            // BinaryDictionaryFormatUtils::detectFormatVersion()
            return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(),
                    VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
                            + VERSION_2_DICTIONARY_FLAG_SIZE);
                    VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
                            + VERSION_2_HEADER_FLAG_SIZE);
        default:
            return S_INT_MAX;
    }
@@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags
        BinaryDictionaryHeaderReadingUtils::getFlags(
                const BinaryDictionaryInfo *const binaryDictionaryInfo) {
    switch (binaryDictionaryInfo->getFormat()) {
        case BinaryDictionaryFormatUtils::VERSION_2:
    switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) {
        case HEADER_VERSION_2:
            return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(),
                    VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE);
                    VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE);
        default:
            return NO_FLAGS;
    }
@@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags
/* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue(
        const BinaryDictionaryInfo *const binaryDictionaryInfo,
        const char *const key, int *outValue, const int outValueSize) {
    if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) {
    if (outValueSize <= 0) {
        return false;
    }
    const int headerSize = getHeaderSize(binaryDictionaryInfo);
    int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat());
    if (pos == NOT_A_DICT_POS) {
        // The header doesn't have header options.
        return false;
    }
    while (pos < headerSize) {
        if(ByteArrayUtils::compareStringInBufferWithCharArray(
                binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) {
+27 −22
Original line number Diff line number Diff line
@@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils {
        return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0;
    }

    static AK_FORCE_INLINE bool hasHeaderAttributes(
            const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
        // Only format 2 and above have header attributes as {key,value} string pairs.
        switch (format) {
        case BinaryDictionaryFormatUtils::VERSION_2:
            return  true;
            break;
        default:
            return false;
        }
    }

    static AK_FORCE_INLINE int getHeaderOptionsPosition(
            const BinaryDictionaryFormatUtils::FORMAT_VERSION format) {
        switch (format) {
        case BinaryDictionaryFormatUtils::VERSION_2:
            return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE
                    + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
            const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) {
        switch (getHeaderVersion(dictionaryFormat)) {
        case HEADER_VERSION_2:
            return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE
                    + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE;
            break;
        default:
            return 0;
            return NOT_A_DICT_POS;
        }
    }

@@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils {
 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils);

    static const int VERSION_2_MAGIC_NUMBER_SIZE;
    static const int VERSION_2_DICTIONARY_VERSION_SIZE;
    static const int VERSION_2_DICTIONARY_FLAG_SIZE;
    static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE;
    enum HEADER_VERSION {
        HEADER_VERSION_2,
        UNKNOWN_HEADER_VERSION
    };

    static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE;
    static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE;
    static const int VERSION_2_HEADER_FLAG_SIZE;
    static const int VERSION_2_HEADER_SIZE_FIELD_SIZE;

    static const DictionaryFlags NO_FLAGS;
    // Flags for special processing
@@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils {
    static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG;
    static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG;
    static const DictionaryFlags CONTAINS_BIGRAMS_FLAG;

    static HEADER_VERSION getHeaderVersion(
            const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) {
        switch(formatVersion) {
            case BinaryDictionaryFormatUtils::VERSION_2:
                // Fall through
            case BinaryDictionaryFormatUtils::VERSION_3:
                return HEADER_VERSION_2;
            default:
                return UNKNOWN_HEADER_VERSION;
        }
    }
};
}
#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
+3 −0
Original line number Diff line number Diff line
@@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory {
        switch (dictionaryFormat) {
            case BinaryDictionaryFormatUtils::VERSION_2:
                return PatriciaTriePolicy::getInstance();
            case BinaryDictionaryFormatUtils::VERSION_3:
                // TODO: support version 3 dictionaries.
                return 0;
            default:
                ASSERT(false);
                return 0;