Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5c6db929 authored by Akifumi Yoshimoto's avatar Akifumi Yoshimoto Committed by Android (Google) Code Review
Browse files

Merge "Native side reads character table"

parents f14043c8 fb2bde5a
Loading
Loading
Loading
Loading
+15 −4
Original line number Diff line number Diff line
@@ -65,7 +65,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
              mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
              mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}

    // Constructs header information using an attribute map.
    HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -97,7 +98,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)),
              mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
                      &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)),
              mCodePointTable(HeaderReadWriteUtils::readCodePointTable(&mAttributeMap)) {}

    // Copy header information
    HeaderPolicy(const HeaderPolicy *const headerPolicy)
@@ -118,7 +120,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mForgettingCurveDurationToLevelDown(
                      headerPolicy->mForgettingCurveDurationToLevelDown),
              mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
              mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
              mMaxBigramCount(headerPolicy->mMaxBigramCount),
              mCodePointTable(headerPolicy->mCodePointTable) {}

    // Temporary dummy header.
    HeaderPolicy()
@@ -128,7 +131,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
              mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0),
              mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false),
              mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0),
              mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {}
              mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0),
              mCodePointTable(nullptr) {}

    ~HeaderPolicy() {}

@@ -139,6 +143,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
        switch (mDictFormatVersion) {
            case FormatUtils::VERSION_2:
                return FormatUtils::VERSION_2;
            case FormatUtils::VERSION_201:
                return FormatUtils::VERSION_201;
            case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
                return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
            case FormatUtils::VERSION_4:
@@ -250,6 +256,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
        return mDictFormatVersion >= FormatUtils::VERSION_4;
    }

    const int *getCodePointTable() const {
        return mCodePointTable;
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);

@@ -295,6 +305,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
    const int mForgettingCurveDurationToLevelDown;
    const int mMaxUnigramCount;
    const int mMaxBigramCount;
    const int *const mCodePointTable;

    const std::vector<int> readLocale() const;
    float readMultipleWordCostMultiplier() const;
+21 −6
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@

#include <cctype>
#include <cstdio>
#include <memory>
#include <vector>

#include "defines.h"
@@ -34,12 +35,13 @@ namespace latinime {
const int HeaderReadWriteUtils::LARGEST_INT_DIGIT_COUNT = 11;

const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256;
const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 2048;

const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4;
const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2;
const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4;
const char *const HeaderReadWriteUtils::CODE_POINT_TABLE_KEY = "codePointTable";

const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0;

@@ -73,20 +75,32 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
        return;
    }
    int keyBuffer[MAX_ATTRIBUTE_KEY_LENGTH];
    int valueBuffer[MAX_ATTRIBUTE_VALUE_LENGTH];
    std::unique_ptr<int[]> valueBuffer(new int[MAX_ATTRIBUTE_VALUE_LENGTH]);
    while (pos < headerSize) {
        // The values in the header don't use the code point table for their encoding.
        const int keyLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
                MAX_ATTRIBUTE_KEY_LENGTH, keyBuffer, &pos);
                MAX_ATTRIBUTE_KEY_LENGTH, nullptr /* codePointTable */, keyBuffer, &pos);
        std::vector<int> key;
        key.insert(key.end(), keyBuffer, keyBuffer + keyLength);
        const int valueLength = ByteArrayUtils::readStringAndAdvancePosition(dictBuf,
                MAX_ATTRIBUTE_VALUE_LENGTH, valueBuffer, &pos);
                MAX_ATTRIBUTE_VALUE_LENGTH, nullptr /* codePointTable */, valueBuffer.get(), &pos);
        std::vector<int> value;
        value.insert(value.end(), valueBuffer, valueBuffer + valueLength);
        value.insert(value.end(), valueBuffer.get(), valueBuffer.get() + valueLength);
        headerAttributes->insert(AttributeMap::value_type(key, value));
    }
}

/* static */ const int *HeaderReadWriteUtils::readCodePointTable(
        AttributeMap *const headerAttributes) {
    AttributeMap::key_type keyVector;
    insertCharactersIntoVector(CODE_POINT_TABLE_KEY, &keyVector);
    AttributeMap::const_iterator it = headerAttributes->find(keyVector);
    if (it == headerAttributes->end()) {
        return nullptr;
    }
    return it->second.data();
}

/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion(
        BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version,
        int *const writingPos) {
@@ -96,7 +110,8 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap;
    }
    switch (version) {
        case FormatUtils::VERSION_2:
            // Version 2 dictionary writing is not supported.
        case FormatUtils::VERSION_201:
            // Version 2 or 201 dictionary writing is not supported.
            return false;
        case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
        case FormatUtils::VERSION_4:
+5 −0
Original line number Diff line number Diff line
@@ -46,6 +46,9 @@ class HeaderReadWriteUtils {
    static void fetchAllHeaderAttributes(const uint8_t *const dictBuf,
            DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);

    static const int *readCodePointTable(
            DictionaryHeaderStructurePolicy::AttributeMap *const headerAttributes);

    static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer,
            const FormatUtils::FORMAT_VERSION version, int *const writingPos);

@@ -101,6 +104,8 @@ class HeaderReadWriteUtils {
    static const int HEADER_FLAG_SIZE;
    static const int HEADER_SIZE_FIELD_SIZE;

    static const char *const CODE_POINT_TABLE_KEY;

    // Value for the "flags" field. It's unused at the moment.
    static const DictionaryFlags NO_FLAGS;

+4 −3
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@

#include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"

#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/structure/backward/v402/content/probability_dict_content.h"
@@ -59,8 +60,8 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
    const int parentPos =
            DynamicPtReadingUtils::getParentPtNodePos(parentPosOffset, headPos);
    int codePoints[MAX_WORD_LENGTH];
    const int codePonitCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, codePoints, &pos);
    const int codePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, mHeaderPolicy->getCodePointTable(), codePoints, &pos);
    int terminalIdFieldPos = NOT_A_DICT_POS;
    int terminalId = Ver4DictConstants::NOT_A_TERMINAL_ID;
    int probability = NOT_A_PROBABILITY;
@@ -98,7 +99,7 @@ const PtNodeParams Ver4PatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProce
        // The destination position is stored at the same place as the parent position.
        return fetchPtNodeInfoFromBufferAndProcessMovedPtNode(parentPos, newSiblingNodePos);
    } else {
        return PtNodeParams(headPos, flags, parentPos, codePonitCount, codePoints,
        return PtNodeParams(headPos, flags, parentPos, codePointCount, codePoints,
                terminalIdFieldPos, terminalId, probability, childrenPosFieldPos, childrenPos,
                newSiblingNodePos);
    }
+3 −1
Original line number Diff line number Diff line
@@ -114,7 +114,8 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
            mmappedBuffer->getReadOnlyByteArrayView());
    switch (formatVersion) {
        case FormatUtils::VERSION_2:
            AKLOGE("Given path is a directory but the format is version 2. path: %s", path);
        case FormatUtils::VERSION_201:
            AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
            break;
        case FormatUtils::VERSION_4: {
            return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
@@ -175,6 +176,7 @@ template<class DictConstants, class DictBuffers, class DictBuffersPtr, class Str
    }
    switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
        case FormatUtils::VERSION_2:
        case FormatUtils::VERSION_201:
            return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
                    new PatriciaTriePolicy(std::move(mmappedBuffer)));
        case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
Loading