Loading native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +23 −20 Original line number Diff line number Diff line Loading @@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4; // The versions of Latin IME that only handle format version 1 only test for the magic // number, so we had to change it so that version 2 files would be rejected by older // implementations. On this occasion, we made the magic number 32 bits long. const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; /* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict, Loading @@ -46,20 +46,23 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; } const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { case FORMAT_VERSION_2_MAGIC_NUMBER: // Version 2 dictionaries are at least 12 bytes long. // If this dictionary has the version 2 magic number but is less than 12 bytes long, case HEADER_VERSION_2_MAGIC_NUMBER: // Version 2 header are at least 12 bytes long. // If this header has the version 2 magic number but is less than 12 bytes long, // then it's an unknown format and we need to avoid confidently reading the next bytes. if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) { if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) { return UNKNOWN_VERSION; } // Format 2 header is as follows: // Version 2 header is as follows: // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE // Version number (2 bytes) 0x00 0x02 // Version number (2 bytes) // Options (2 bytes) // Header size (4 bytes) : integer, big endian if (ByteArrayUtils::readUint16(dict, 4) == 2) { return VERSION_2; } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { // TODO: Support version 3 dictionary. return UNKNOWN_VERSION; } else { return UNKNOWN_VERSION; } Loading native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h +4 −4 Original line number Diff line number Diff line Loading @@ -33,9 +33,9 @@ namespace latinime { */ class BinaryDictionaryFormatUtils { public: // TODO: Support version 3 format. enum FORMAT_VERSION { VERSION_2 = 1, VERSION_2, VERSION_3, UNKNOWN_VERSION }; Loading @@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils); static const int DICTIONARY_MINIMUM_SIZE; static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER; static const int FORMAT_VERSION_2_MINIMUM_SIZE; static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime #endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */ native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +16 −12 Original line number Diff line number Diff line Loading @@ -26,10 +26,10 @@ namespace latinime { const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4; const BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; Loading @@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( const BinaryDictionaryInfo *const binaryDictionaryInfo) { switch (binaryDictionaryInfo->getFormat()) { case BinaryDictionaryFormatUtils::VERSION_2: switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { case HEADER_VERSION_2: // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE + VERSION_2_DICTIONARY_FLAG_SIZE); VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + VERSION_2_HEADER_FLAG_SIZE); default: return S_INT_MAX; } Loading @@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::getFlags( const BinaryDictionaryInfo *const binaryDictionaryInfo) { switch (binaryDictionaryInfo->getFormat()) { case BinaryDictionaryFormatUtils::VERSION_2: switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { case HEADER_VERSION_2: return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE); VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); default: return NO_FLAGS; } Loading @@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key, int *outValue, const int outValueSize) { if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) { if (outValueSize <= 0) { return false; } const int headerSize = getHeaderSize(binaryDictionaryInfo); int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); if (pos == NOT_A_DICT_POS) { // The header doesn't have header options. return false; } while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { Loading native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +27 −22 Original line number Diff line number Diff line Loading @@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils { return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; } static AK_FORCE_INLINE bool hasHeaderAttributes( const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { // Only format 2 and above have header attributes as {key,value} string pairs. switch (format) { case BinaryDictionaryFormatUtils::VERSION_2: return true; break; default: return false; } } static AK_FORCE_INLINE int getHeaderOptionsPosition( const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { switch (format) { case BinaryDictionaryFormatUtils::VERSION_2: return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { switch (getHeaderVersion(dictionaryFormat)) { case HEADER_VERSION_2: return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; break; default: return 0; return NOT_A_DICT_POS; } } Loading @@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); static const int VERSION_2_MAGIC_NUMBER_SIZE; static const int VERSION_2_DICTIONARY_VERSION_SIZE; static const int VERSION_2_DICTIONARY_FLAG_SIZE; static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; enum HEADER_VERSION { HEADER_VERSION_2, UNKNOWN_HEADER_VERSION }; static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; static const int VERSION_2_HEADER_FLAG_SIZE; static const int VERSION_2_HEADER_SIZE_FIELD_SIZE; static const DictionaryFlags NO_FLAGS; // Flags for special processing Loading @@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils { static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; static HEADER_VERSION getHeaderVersion( const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) { switch(formatVersion) { case BinaryDictionaryFormatUtils::VERSION_2: // Fall through case BinaryDictionaryFormatUtils::VERSION_3: return HEADER_VERSION_2; default: return UNKNOWN_HEADER_VERSION; } } }; } #endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h +3 −0 Original line number Diff line number Diff line Loading @@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory { switch (dictionaryFormat) { case BinaryDictionaryFormatUtils::VERSION_2: return PatriciaTriePolicy::getInstance(); case BinaryDictionaryFormatUtils::VERSION_3: // TODO: support version 3 dictionaries. return 0; default: ASSERT(false); return 0; Loading Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +23 −20 Original line number Diff line number Diff line Loading @@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4; // The versions of Latin IME that only handle format version 1 only test for the magic // number, so we had to change it so that version 2 files would be rejected by older // implementations. On this occasion, we made the magic number 32 bits long. const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; /* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict, Loading @@ -46,20 +46,23 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; } const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { case FORMAT_VERSION_2_MAGIC_NUMBER: // Version 2 dictionaries are at least 12 bytes long. // If this dictionary has the version 2 magic number but is less than 12 bytes long, case HEADER_VERSION_2_MAGIC_NUMBER: // Version 2 header are at least 12 bytes long. // If this header has the version 2 magic number but is less than 12 bytes long, // then it's an unknown format and we need to avoid confidently reading the next bytes. if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) { if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) { return UNKNOWN_VERSION; } // Format 2 header is as follows: // Version 2 header is as follows: // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE // Version number (2 bytes) 0x00 0x02 // Version number (2 bytes) // Options (2 bytes) // Header size (4 bytes) : integer, big endian if (ByteArrayUtils::readUint16(dict, 4) == 2) { return VERSION_2; } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { // TODO: Support version 3 dictionary. return UNKNOWN_VERSION; } else { return UNKNOWN_VERSION; } Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h +4 −4 Original line number Diff line number Diff line Loading @@ -33,9 +33,9 @@ namespace latinime { */ class BinaryDictionaryFormatUtils { public: // TODO: Support version 3 format. enum FORMAT_VERSION { VERSION_2 = 1, VERSION_2, VERSION_3, UNKNOWN_VERSION }; Loading @@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils); static const int DICTIONARY_MINIMUM_SIZE; static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER; static const int FORMAT_VERSION_2_MINIMUM_SIZE; static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime #endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */
native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +16 −12 Original line number Diff line number Diff line Loading @@ -26,10 +26,10 @@ namespace latinime { const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4; const BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; Loading @@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( const BinaryDictionaryInfo *const binaryDictionaryInfo) { switch (binaryDictionaryInfo->getFormat()) { case BinaryDictionaryFormatUtils::VERSION_2: switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { case HEADER_VERSION_2: // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE + VERSION_2_DICTIONARY_FLAG_SIZE); VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + VERSION_2_HEADER_FLAG_SIZE); default: return S_INT_MAX; } Loading @@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::getFlags( const BinaryDictionaryInfo *const binaryDictionaryInfo) { switch (binaryDictionaryInfo->getFormat()) { case BinaryDictionaryFormatUtils::VERSION_2: switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { case HEADER_VERSION_2: return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE); VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); default: return NO_FLAGS; } Loading @@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key, int *outValue, const int outValueSize) { if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) { if (outValueSize <= 0) { return false; } const int headerSize = getHeaderSize(binaryDictionaryInfo); int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); if (pos == NOT_A_DICT_POS) { // The header doesn't have header options. return false; } while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { Loading
native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +27 −22 Original line number Diff line number Diff line Loading @@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils { return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; } static AK_FORCE_INLINE bool hasHeaderAttributes( const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { // Only format 2 and above have header attributes as {key,value} string pairs. switch (format) { case BinaryDictionaryFormatUtils::VERSION_2: return true; break; default: return false; } } static AK_FORCE_INLINE int getHeaderOptionsPosition( const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { switch (format) { case BinaryDictionaryFormatUtils::VERSION_2: return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { switch (getHeaderVersion(dictionaryFormat)) { case HEADER_VERSION_2: return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; break; default: return 0; return NOT_A_DICT_POS; } } Loading @@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); static const int VERSION_2_MAGIC_NUMBER_SIZE; static const int VERSION_2_DICTIONARY_VERSION_SIZE; static const int VERSION_2_DICTIONARY_FLAG_SIZE; static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; enum HEADER_VERSION { HEADER_VERSION_2, UNKNOWN_HEADER_VERSION }; static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; static const int VERSION_2_HEADER_FLAG_SIZE; static const int VERSION_2_HEADER_SIZE_FIELD_SIZE; static const DictionaryFlags NO_FLAGS; // Flags for special processing Loading @@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils { static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; static HEADER_VERSION getHeaderVersion( const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) { switch(formatVersion) { case BinaryDictionaryFormatUtils::VERSION_2: // Fall through case BinaryDictionaryFormatUtils::VERSION_3: return HEADER_VERSION_2; default: return UNKNOWN_HEADER_VERSION; } } }; } #endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */
native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h +3 −0 Original line number Diff line number Diff line Loading @@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory { switch (dictionaryFormat) { case BinaryDictionaryFormatUtils::VERSION_2: return PatriciaTriePolicy::getInstance(); case BinaryDictionaryFormatUtils::VERSION_3: // TODO: support version 3 dictionaries. return 0; default: ASSERT(false); return 0; Loading