Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb2bde5a authored by Akifumi Yoshimoto's avatar Akifumi Yoshimoto
Browse files

Native side reads character table

Bug:17097992
Change-Id: Ibcfc67833a6754fe6a2d82a3e3023b33886b9ea2
parent eddfe51b
Loading
Loading
Loading
Loading
+15 −4
Original line number Diff line number Diff line
@@ -65,7 +65,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
              mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
              mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}

    // Constructs header information using an attribute map.
    HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -97,7 +98,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
              mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
              mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}

    // Copy header information
    HeaderPolicy(const HeaderPolicy *const headerPolicy)
@@ -118,7 +120,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mForgettingCurveDurationToLevelDown(
                      headerPolicy->mForgettingCurveDurationToLevelDown),
              mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
              mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
              mMaxBigramCount(headerPolicy->mMaxBigramCount),
              mCodePointTable(headerPolicy->mCodePointTable) {}

    // Temporary dummy header.
    HeaderPolicy()
@@ -128,7 +131,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
              mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
              mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
              mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {}
              mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0),
              mCodePointTable(nullptr) {}

    ~HeaderPolicy() {}

@@ -139,6 +143,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
        switch (mDictFormatVersion) {
            case FormatUtils::VERSION_2:
                return FormatUtils::VERSION_2;
            case FormatUtils::VERSION_201:
                return FormatUtils::VERSION_201;
            case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
                return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
            case FormatUtils::VERSION_4:
@@ -250,6 +256,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
        return mDictFormatVersion >= FormatUtils::VERSION_4;
    }

    const int *getCodePointTable() const {
        return mCodePointTable;
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);

@@ -295,6 +305,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
    const int mForgettingCurveDurationToLevelDown;
    const int mMaxUnigramCount;
    const int mMaxBigramCount;
    const int *const mCodePointTable;

    const std::vector<int> readLocale() const;
    float readMultipleWordCostMultiplier() const;
+21 −6
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@

#include <cctype>
#include <cstdio>
#include <memory>
#include <vector>

#include "defines.h"
@@ -34,12 +35,13 @@ namespace latinime {
const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;

const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;

const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";

const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;

@@ -73,20 +75,32 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
        return;
    }
    int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
    int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
    std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
    while (pos < headerSize) {
        // The values in the header don't use the code point table for their encoding.
        const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
                MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
                MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
        std::vector<int> key;
        key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
        const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
                MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
                MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
        std::vector<int> value;
        value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
        value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
        headerAttributes->insert(AttributeMap::value_type(key, value));
    }
}

/* static */ const int *HeaderReadWriteUtils::readCodePointTable(
        AttributeMap *const headerAttributes) {
    AttributeMap::key_type keyVector;
    insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
    AttributeMap::const_iterator it = headerAttributes->find(keyVector);
    if (it == headerAttributes->end()) {
        return nullptr;
    }
    return it->second.data();
}

/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
        BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
        int *const writingPos) {
@@ -96,7 +110,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
    }
    switch (version) {
        case FormatUtils::VERSION_2:
            // Version 2 dictionary writing is not supported.
        case FormatUtils::VERSION_201:
            // Version 2 or 201 dictionary writing is not supported.
            return false;
        case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
        case FormatUtils::VERSION_4:
+5 −0
Original line number Diff line number Diff line
@@ -46,6 +46,9 @@ class HeaderReadWriteUtils {
    static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
            DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);

    static const int *readCodePointTable(
            DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);

    static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
            const FormatUtils::FORMAT_VERSION version, int *const writingPos);

@@ -101,6 +104,8 @@ class HeaderReadWriteUtils {
    static const int HEADER_FLAG_SIZE;
    static const int HEADER_SIZE_FIELD_SIZE;

    static const char *const CODE_POINT_TABLE_KEY;

    // Value for the "flags" field. It's unused at the moment.
    static const DictionaryFlags NO_FLAGS;

+4 −3
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@

#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"

#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
@@ -59,8 +60,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
    const int parentPos =
            DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
    int codePoints[MAX_WORD_LENGTH];
    const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
    const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
    int terminalIdFieldPos = NOT_A_DICT_POS;
    int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
    int probability = NOT_A_PROBABILITY;
@@ -98,7 +99,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
        // The destination position is stored at the same place as the parent position.
        return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
    } else {
        return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
        return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
                terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
                newSiblingNodePos);
    }
+3 −1
Original line number Diff line number Diff line
@@ -114,7 +114,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
            mmappedBuffer->getReadOnlyByteArrayView());
    switch (formatVersion) {
        case FormatUtils::VERSION_2:
            AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
        case FormatUtils::VERSION_201:
            AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
            break;
        case FormatUtils::VERSION_4: {
            return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
@@ -175,6 +176,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
    }
    switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
        case FormatUtils::VERSION_2:
        case FormatUtils::VERSION_201:
            return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
                    new PatriciaTriePolicy(std::move(mmappedBuffer)));
        case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
Loading