Loading native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +76 −10 Original line number Original line Diff line number Diff line Loading @@ -16,24 +16,90 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include <cstddef> namespace latinime { namespace latinime { const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; "MULTIPLE_WORDS_DEMOTION_RATE"; const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; const float HeaderPolicy::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; const float HeaderPolicy::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; float HeaderPolicy::readMultiWordCostMultiplier() const { // Used for logging. Question mark is used to indicate that the key is not found. const int headerValue = HeaderReadingUtils::readHeaderValueInt( void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue, mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY); int outValueSize) const { if (outValueSize <= 0) return; if (outValueSize == 1) { outValue[0] = '\0'; return; } std::vector<int> keyCodePointVector; insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. outValue[0] = '?'; outValue[1] = '\0'; return; } const int terminalIndex = min(static_cast<int>(it->second.size()), outValueSize - 1); for (int i = 0; i < terminalIndex; ++i) { outValue[i] = it->second[i]; } outValue[terminalIndex] = '\0'; } float HeaderPolicy::readMultipleWordCostMultiplier() const { std::vector<int> multipleWordsDemotionRateKeyVector; insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &multipleWordsDemotionRateKeyVector); HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(multipleWordsDemotionRateKeyVector); if (it == mAttributeMap.end()) { // The key was not found. return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; } const int headerValue = parseIntAttributeValue(&(it->second)); if (headerValue == S_INT_MIN) { if (headerValue == S_INT_MIN) { // not found // Invalid value return DEFAULT_MULTI_WORD_COST_MULTIPLIER; return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; } } if (headerValue <= 0) { if (headerValue <= 0) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } } return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); } /* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf) { HeaderReadingUtils::AttributeMap attributeMap; HeaderReadingUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); return attributeMap; } /* static */ int HeaderPolicy::parseIntAttributeValue( const std::vector<int> *const attributeValue) { int value = 0; bool isNegative = false; for (size_t i = 0; i < attributeValue->size(); ++i) { if (i == 0 && attributeValue->at(i) == '-') { isNegative = true; } else { if (!isdigit(attributeValue->at(i))) { // If not a number, return S_INT_MIN return S_INT_MIN; } value *= 10; value += attributeValue->at(i) - '0'; } } return isNegative ? -value : value; } /* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, std::vector<int> *const vector) { for (int i = 0; characters[i]; ++i) { vector->push_back(characters[i]); } } } } // namespace latinime } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +17 −18 Original line number Original line Diff line number Diff line Loading @@ -17,6 +17,7 @@ #ifndef LATINIME_HEADER_POLICY_H #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H #include <cctype> #include <stdint.h> #include <stdint.h> #include "defines.h" #include "defines.h" Loading @@ -30,7 +31,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { explicit HeaderPolicy(const uint8_t *const dictBuf) explicit HeaderPolicy(const uint8_t *const dictBuf) : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()) {} ~HeaderPolicy() {} ~HeaderPolicy() {} Loading @@ -55,34 +57,31 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMultiWordCostMultiplier; return mMultiWordCostMultiplier; } } AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key, void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const { int *outValue, int outValueSize) const; if (outValueSize <= 0) return; if (outValueSize == 1) { outValue[0] = '\0'; return; } if (!HeaderReadingUtils::readHeaderValue(mDictBuf, key, outValue, outValueSize)) { outValue[0] = '?'; outValue[1] = '\0'; } } private: private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER; static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; static const float MULTI_WORD_COST_MULTIPLIER_SCALE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; const uint8_t *const mDictBuf; const uint8_t *const mDictBuf; const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; const int mSize; const int mSize; HeaderReadingUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; const float mMultiWordCostMultiplier; float readMultiWordCostMultiplier() const; float readMultipleWordCostMultiplier() const; }; static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); static int parseIntAttributeValue(const std::vector<int> *const attributeValue); static void insertCharactersIntoVector( const char *const characters, std::vector<int> *const vector); }; } // namespace latinime } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ #endif /* LATINIME_HEADER_POLICY_H */ native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp +18 −45 Original line number Original line Diff line number Diff line Loading @@ -16,23 +16,22 @@ #include "suggest/policyimpl/dictionary/header/header_reading_utils.h" #include "suggest/policyimpl/dictionary/header/header_reading_utils.h" #include <cctype> #include <vector> #include <cstdlib> #include "defines.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { namespace latinime { const int HeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; const int HeaderReadingUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; const int HeaderReadingUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; const HeaderReadingUtils::DictionaryFlags const HeaderReadingUtils::DictionaryFlags HeaderReadingUtils::NO_FLAGS = 0; HeaderReadingUtils::NO_FLAGS = 0; // Flags for special processing // Flags for special processing // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or // something very bad (like, the apocalypse) will happen. Please update both at the same time. // something very bad (like, the apocalypse) will happen. Please update both at the same time. Loading @@ -56,53 +55,27 @@ const HeaderReadingUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } } // Returns if the key is found or not and reads the found value into outValue. /* static */ void HeaderReadingUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, /* static */ bool HeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const char *const key, int *outValue, const int outValueSize) { if (outValueSize <= 0) { return false; } const int headerSize = getHeaderSize(dictBuf); const int headerSize = getHeaderSize(dictBuf); int pos = getHeaderOptionsPosition(); int pos = getHeaderOptionsPosition(); if (pos == NOT_A_DICT_POS) { if (pos == NOT_A_DICT_POS) { // The header doesn't have header options. // The header doesn't have header options. return false; return; } } int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH]; while (pos < headerSize) { while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, dictBuf, key, headerSize - pos, &pos) == 0) { MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos); // The key was found. std::vector<int> key; const int length = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, outValueSize, key.insert(key.end(), keyBuffer, keyBuffer + keyLength); outValue, &pos); const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, // Add a 0 terminator to the string. MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos); outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; std::vector<int> value; return true; value.insert(value.end(), valueBuffer, valueBuffer + valueLength); } headerAttributes->insert(AttributeMap::value_type(key, value)); ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos); } // The key was not found. return false; } /* static */ int HeaderReadingUtils::readHeaderValueInt( const uint8_t *const dictBuf, const char *const key) { const int bufferSize = LARGEST_INT_DIGIT_COUNT; int intBuffer[bufferSize]; char charBuffer[bufferSize]; if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) { return S_INT_MIN; } for (int i = 0; i < bufferSize; ++i) { charBuffer[i] = intBuffer[i]; if (charBuffer[i] == '0') { break; } if (!isdigit(charBuffer[i])) { // If not a number, return S_INT_MIN return S_INT_MIN; } } } return atoi(charBuffer); } } } // namespace latinime } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h +8 −6 Original line number Original line Diff line number Diff line Loading @@ -17,7 +17,9 @@ #ifndef LATINIME_HEADER_READING_UTILS_H #ifndef LATINIME_HEADER_READING_UTILS_H #define LATINIME_HEADER_READING_UTILS_H #define LATINIME_HEADER_READING_UTILS_H #include <map> #include <stdint.h> #include <stdint.h> #include <vector> #include "defines.h" #include "defines.h" Loading @@ -26,8 +28,7 @@ namespace latinime { class HeaderReadingUtils { class HeaderReadingUtils { public: public: typedef uint16_t DictionaryFlags; typedef uint16_t DictionaryFlags; typedef std::map<std::vector<int>, std::vector<int> > AttributeMap; static const int MAX_OPTION_KEY_LENGTH; static int getHeaderSize(const uint8_t *const dictBuf); static int getHeaderSize(const uint8_t *const dictBuf); Loading @@ -50,14 +51,15 @@ class HeaderReadingUtils { + HEADER_SIZE_FIELD_SIZE; + HEADER_SIZE_FIELD_SIZE; } } static bool readHeaderValue(const uint8_t *const dictBuf, static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, const char *const key, int *outValue, const int outValueSize); AttributeMap *const headerAttributes); static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key); private: private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); static const int MAX_ATTRIBUTE_KEY_LENGTH; static const int MAX_ATTRIBUTE_VALUE_LENGTH; static const int HEADER_MAGIC_NUMBER_SIZE; static const int HEADER_MAGIC_NUMBER_SIZE; static const int HEADER_DICTIONARY_VERSION_SIZE; static const int HEADER_DICTIONARY_VERSION_SIZE; static const int HEADER_FLAG_SIZE; static const int HEADER_FLAG_SIZE; Loading native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +0 −33 Original line number Original line Diff line number Diff line Loading @@ -176,39 +176,6 @@ class ByteArrayUtils { return length; return length; } } // Returns an integer less than, equal to, or greater than zero when string starting from pos // in buffer is less than, match, or is greater than charArray. static AK_FORCE_INLINE int compareStringInBufferWithCharArray(const uint8_t *const buffer, const char *const charArray, const int maxLength, int *const pos) { int index = 0; int codePoint = readCodePointAndAdvancePosition(buffer, pos); const uint8_t *const uint8CharArrayForComparison = reinterpret_cast<const uint8_t *>(charArray); while (NOT_A_CODE_POINT != codePoint && '\0' != uint8CharArrayForComparison[index] && index < maxLength) { if (codePoint != uint8CharArrayForComparison[index]) { // Different character is found. // Skip the rest of the string in the buffer. advancePositionToBehindString(buffer, maxLength - index, pos); return codePoint - uint8CharArrayForComparison[index]; } // Advance codePoint = readCodePointAndAdvancePosition(buffer, pos); ++index; } if (NOT_A_CODE_POINT != codePoint && index < maxLength) { // Skip the rest of the string in the buffer. advancePositionToBehindString(buffer, maxLength - index, pos); } if (NOT_A_CODE_POINT == codePoint && '\0' == uint8CharArrayForComparison[index]) { // When both of the last characters are terminals, we consider the string in the buffer // matches the given char array return 0; } else { return codePoint - uint8CharArrayForComparison[index]; } } private: private: DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); Loading Loading
native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +76 −10 Original line number Original line Diff line number Diff line Loading @@ -16,24 +16,90 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/header/header_policy.h" #include <cstddef> namespace latinime { namespace latinime { const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; "MULTIPLE_WORDS_DEMOTION_RATE"; const float HeaderPolicy::DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER = 1.0f; const float HeaderPolicy::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; const float HeaderPolicy::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; float HeaderPolicy::readMultiWordCostMultiplier() const { // Used for logging. Question mark is used to indicate that the key is not found. const int headerValue = HeaderReadingUtils::readHeaderValueInt( void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue, mDictBuf, MULTIPLE_WORDS_DEMOTION_RATE_KEY); int outValueSize) const { if (outValueSize <= 0) return; if (outValueSize == 1) { outValue[0] = '\0'; return; } std::vector<int> keyCodePointVector; insertCharactersIntoVector(key, &keyCodePointVector); HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. outValue[0] = '?'; outValue[1] = '\0'; return; } const int terminalIndex = min(static_cast<int>(it->second.size()), outValueSize - 1); for (int i = 0; i < terminalIndex; ++i) { outValue[i] = it->second[i]; } outValue[terminalIndex] = '\0'; } float HeaderPolicy::readMultipleWordCostMultiplier() const { std::vector<int> multipleWordsDemotionRateKeyVector; insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &multipleWordsDemotionRateKeyVector); HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(multipleWordsDemotionRateKeyVector); if (it == mAttributeMap.end()) { // The key was not found. return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; } const int headerValue = parseIntAttributeValue(&(it->second)); if (headerValue == S_INT_MIN) { if (headerValue == S_INT_MIN) { // not found // Invalid value return DEFAULT_MULTI_WORD_COST_MULTIPLIER; return DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; } } if (headerValue <= 0) { if (headerValue <= 0) { return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); } } return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(headerValue); } /* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf) { HeaderReadingUtils::AttributeMap attributeMap; HeaderReadingUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); return attributeMap; } /* static */ int HeaderPolicy::parseIntAttributeValue( const std::vector<int> *const attributeValue) { int value = 0; bool isNegative = false; for (size_t i = 0; i < attributeValue->size(); ++i) { if (i == 0 && attributeValue->at(i) == '-') { isNegative = true; } else { if (!isdigit(attributeValue->at(i))) { // If not a number, return S_INT_MIN return S_INT_MIN; } value *= 10; value += attributeValue->at(i) - '0'; } } return isNegative ? -value : value; } /* static */ void HeaderPolicy::insertCharactersIntoVector(const char *const characters, std::vector<int> *const vector) { for (int i = 0; characters[i]; ++i) { vector->push_back(characters[i]); } } } } // namespace latinime } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +17 −18 Original line number Original line Diff line number Diff line Loading @@ -17,6 +17,7 @@ #ifndef LATINIME_HEADER_POLICY_H #ifndef LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H #define LATINIME_HEADER_POLICY_H #include <cctype> #include <stdint.h> #include <stdint.h> #include "defines.h" #include "defines.h" Loading @@ -30,7 +31,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { explicit HeaderPolicy(const uint8_t *const dictBuf) explicit HeaderPolicy(const uint8_t *const dictBuf) : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()) {} ~HeaderPolicy() {} ~HeaderPolicy() {} Loading @@ -55,34 +57,31 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { return mMultiWordCostMultiplier; return mMultiWordCostMultiplier; } } AK_FORCE_INLINE void readHeaderValueOrQuestionMark(const char *const key, void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const { int *outValue, int outValueSize) const; if (outValueSize <= 0) return; if (outValueSize == 1) { outValue[0] = '\0'; return; } if (!HeaderReadingUtils::readHeaderValue(mDictBuf, key, outValue, outValueSize)) { outValue[0] = '?'; outValue[1] = '\0'; } } private: private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER; static const float DEFAULT_MULTIPLE_WORD_COST_MULTIPLIER; static const float MULTI_WORD_COST_MULTIPLIER_SCALE; static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; const uint8_t *const mDictBuf; const uint8_t *const mDictBuf; const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; const int mSize; const int mSize; HeaderReadingUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; const float mMultiWordCostMultiplier; float readMultiWordCostMultiplier() const; float readMultipleWordCostMultiplier() const; }; static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); static int parseIntAttributeValue(const std::vector<int> *const attributeValue); static void insertCharactersIntoVector( const char *const characters, std::vector<int> *const vector); }; } // namespace latinime } // namespace latinime #endif /* LATINIME_HEADER_POLICY_H */ #endif /* LATINIME_HEADER_POLICY_H */
native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp +18 −45 Original line number Original line Diff line number Diff line Loading @@ -16,23 +16,22 @@ #include "suggest/policyimpl/dictionary/header/header_reading_utils.h" #include "suggest/policyimpl/dictionary/header/header_reading_utils.h" #include <cctype> #include <vector> #include <cstdlib> #include "defines.h" #include "defines.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { namespace latinime { const int HeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; const int HeaderReadingUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; const int HeaderReadingUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; const HeaderReadingUtils::DictionaryFlags const HeaderReadingUtils::DictionaryFlags HeaderReadingUtils::NO_FLAGS = 0; HeaderReadingUtils::NO_FLAGS = 0; // Flags for special processing // Flags for special processing // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or // something very bad (like, the apocalypse) will happen. Please update both at the same time. // something very bad (like, the apocalypse) will happen. Please update both at the same time. Loading @@ -56,53 +55,27 @@ const HeaderReadingUtils::DictionaryFlags HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } } // Returns if the key is found or not and reads the found value into outValue. /* static */ void HeaderReadingUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, /* static */ bool HeaderReadingUtils::readHeaderValue(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const char *const key, int *outValue, const int outValueSize) { if (outValueSize <= 0) { return false; } const int headerSize = getHeaderSize(dictBuf); const int headerSize = getHeaderSize(dictBuf); int pos = getHeaderOptionsPosition(); int pos = getHeaderOptionsPosition(); if (pos == NOT_A_DICT_POS) { if (pos == NOT_A_DICT_POS) { // The header doesn't have header options. // The header doesn't have header options. return false; return; } } int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH]; int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH]; while (pos < headerSize) { while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, dictBuf, key, headerSize - pos, &pos) == 0) { MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos); // The key was found. std::vector<int> key; const int length = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, outValueSize, key.insert(key.end(), keyBuffer, keyBuffer + keyLength); outValue, &pos); const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf, // Add a 0 terminator to the string. MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos); outValue[length < outValueSize ? length : outValueSize - 1] = '\0'; std::vector<int> value; return true; value.insert(value.end(), valueBuffer, valueBuffer + valueLength); } headerAttributes->insert(AttributeMap::value_type(key, value)); ByteArrayUtils::advancePositionToBehindString(dictBuf, headerSize - pos, &pos); } // The key was not found. return false; } /* static */ int HeaderReadingUtils::readHeaderValueInt( const uint8_t *const dictBuf, const char *const key) { const int bufferSize = LARGEST_INT_DIGIT_COUNT; int intBuffer[bufferSize]; char charBuffer[bufferSize]; if (!readHeaderValue(dictBuf, key, intBuffer, bufferSize)) { return S_INT_MIN; } for (int i = 0; i < bufferSize; ++i) { charBuffer[i] = intBuffer[i]; if (charBuffer[i] == '0') { break; } if (!isdigit(charBuffer[i])) { // If not a number, return S_INT_MIN return S_INT_MIN; } } } return atoi(charBuffer); } } } // namespace latinime } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h +8 −6 Original line number Original line Diff line number Diff line Loading @@ -17,7 +17,9 @@ #ifndef LATINIME_HEADER_READING_UTILS_H #ifndef LATINIME_HEADER_READING_UTILS_H #define LATINIME_HEADER_READING_UTILS_H #define LATINIME_HEADER_READING_UTILS_H #include <map> #include <stdint.h> #include <stdint.h> #include <vector> #include "defines.h" #include "defines.h" Loading @@ -26,8 +28,7 @@ namespace latinime { class HeaderReadingUtils { class HeaderReadingUtils { public: public: typedef uint16_t DictionaryFlags; typedef uint16_t DictionaryFlags; typedef std::map<std::vector<int>, std::vector<int> > AttributeMap; static const int MAX_OPTION_KEY_LENGTH; static int getHeaderSize(const uint8_t *const dictBuf); static int getHeaderSize(const uint8_t *const dictBuf); Loading @@ -50,14 +51,15 @@ class HeaderReadingUtils { + HEADER_SIZE_FIELD_SIZE; + HEADER_SIZE_FIELD_SIZE; } } static bool readHeaderValue(const uint8_t *const dictBuf, static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, const char *const key, int *outValue, const int outValueSize); AttributeMap *const headerAttributes); static int readHeaderValueInt(const uint8_t *const dictBuf, const char *const key); private: private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); static const int MAX_ATTRIBUTE_KEY_LENGTH; static const int MAX_ATTRIBUTE_VALUE_LENGTH; static const int HEADER_MAGIC_NUMBER_SIZE; static const int HEADER_MAGIC_NUMBER_SIZE; static const int HEADER_DICTIONARY_VERSION_SIZE; static const int HEADER_DICTIONARY_VERSION_SIZE; static const int HEADER_FLAG_SIZE; static const int HEADER_FLAG_SIZE; Loading
native/jni/src/suggest/policyimpl/dictionary/utils/byte_array_utils.h +0 −33 Original line number Original line Diff line number Diff line Loading @@ -176,39 +176,6 @@ class ByteArrayUtils { return length; return length; } } // Returns an integer less than, equal to, or greater than zero when string starting from pos // in buffer is less than, match, or is greater than charArray. static AK_FORCE_INLINE int compareStringInBufferWithCharArray(const uint8_t *const buffer, const char *const charArray, const int maxLength, int *const pos) { int index = 0; int codePoint = readCodePointAndAdvancePosition(buffer, pos); const uint8_t *const uint8CharArrayForComparison = reinterpret_cast<const uint8_t *>(charArray); while (NOT_A_CODE_POINT != codePoint && '\0' != uint8CharArrayForComparison[index] && index < maxLength) { if (codePoint != uint8CharArrayForComparison[index]) { // Different character is found. // Skip the rest of the string in the buffer. advancePositionToBehindString(buffer, maxLength - index, pos); return codePoint - uint8CharArrayForComparison[index]; } // Advance codePoint = readCodePointAndAdvancePosition(buffer, pos); ++index; } if (NOT_A_CODE_POINT != codePoint && index < maxLength) { // Skip the rest of the string in the buffer. advancePositionToBehindString(buffer, maxLength - index, pos); } if (NOT_A_CODE_POINT == codePoint && '\0' == uint8CharArrayForComparison[index]) { // When both of the last characters are terminals, we consider the string in the buffer // matches the given char array return 0; } else { return codePoint - uint8CharArrayForComparison[index]; } } private: private: DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); Loading