Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7c73e0f2 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Create Ver2ParticiaTrieNodeReader."

parents 0d2df2ac 1e275292
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -57,7 +57,8 @@ LATIN_IME_CORE_SRC_FILES := \
        dynamic_pt_writing_utils.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
        patricia_trie_policy.cpp \
        patricia_trie_reading_utils.cpp) \
        patricia_trie_reading_utils.cpp \
        ver2_patricia_trie_node_reader.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
        ver4_dict_buffers.cpp \
        ver4_dict_constants.cpp \
+15 −0
Original line number Diff line number Diff line
@@ -53,6 +53,21 @@ class PtNodeParams {
        memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
    }

    // PtNode read from version 2 dictionary.
    PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
            const int codePointCount, const int *const codePoints, const int probability,
            const int childrenPos, const int shortcutPos, const int bigramPos,
            const int siblingPos)
            : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
              mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
              mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
              mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
              mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
              mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
              mBigramPos(bigramPos), mSiblingPos(siblingPos) {
        memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
    }

    // PtNode with a terminal id.
    PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
            const int parentPos, const int codePointCount, const int *const codePoints,
+18 −67
Original line number Diff line number Diff line
@@ -336,99 +336,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const
    if (ptNodePos == NOT_A_DICT_POS) {
        return NOT_A_PROBABILITY;
    }
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
        return NOT_A_PROBABILITY;
    }
    if (PatriciaTrieReadingUtils::isNotAWord(flags)
            || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
    const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
    if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
        // If this is not a word, or if it's a blacklisted entry, it should behave as
        // having no probability outside of the suggestion process (where it should be used
        // for shortcuts).
        return NOT_A_PROBABILITY;
    }
    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
            mDictRoot, &pos), NOT_A_PROBABILITY);
    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}

int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
    if (ptNodePos == NOT_A_DICT_POS) {
        return NOT_A_DICT_POS;
    }
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
        return NOT_A_DICT_POS;
    }
    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
    }
    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
    }
    return pos;
    return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}

int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
    if (ptNodePos == NOT_A_DICT_POS) {
        return NOT_A_DICT_POS;
    }
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
        return NOT_A_DICT_POS;
    }
    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
    }
    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
    }
    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
        mShortcutListPolicy.skipAllShortcuts(&pos);;
    }
    return pos;
    return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos();
}

int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
        const int ptNodePos, DicNodeVector *childDicNodes) const {
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    PatriciaTrieReadingUtils::NodeFlags flags;
    int mergedNodeCodePointCount = 0;
    int mergedNodeCodePoints[MAX_WORD_LENGTH];
    const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
    const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
            PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
                    : NOT_A_PROBABILITY;
    const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
            PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                    mDictRoot, flags, &pos) : NOT_A_DICT_POS;
    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
        getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
    }
    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
        getBigramsStructurePolicy()->skipAllBigrams(&pos);
    }
    if (mergedNodeCodePointCount <= 0) {
        AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
        ASSERT(false);
        return pos;
    }
    int probability = NOT_A_PROBABILITY;
    int childrenPos = NOT_A_DICT_POS;
    int shortcutPos = NOT_A_DICT_POS;
    int bigramPos = NOT_A_DICT_POS;
    int siblingPos = NOT_A_DICT_POS;
    PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
            getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
    childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
            PatriciaTrieReadingUtils::isTerminal(flags),
            PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
            PatriciaTrieReadingUtils::isBlacklisted(flags) ||
                    PatriciaTrieReadingUtils::isNotAWord(flags),
            PatriciaTrieReadingUtils::isBlacklisted(flags)
                    || PatriciaTrieReadingUtils::isNotAWord(flags),
            mergedNodeCodePointCount, mergedNodeCodePoints);
    return pos;
    return siblingPos;
}

} // namespace latinime
+4 −1
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"

@@ -40,7 +41,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
              mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
              mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
                      - mHeaderPolicy.getSize()),
              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}

    AK_FORCE_INLINE int getRootPosition() const {
        return 0;
@@ -143,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    const int mDictBufferSize;
    const BigramListPolicy mBigramListPolicy;
    const ShortcutListPolicy mShortcutListPolicy;
    const Ver2ParticiaTrieNodeReader mPtNodeReader;

    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
            DicNodeVector *const childDicNodes) const;
+30 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"

#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"

namespace latinime {
@@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
    return base + offset;
}

/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
        const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
        const DictionaryBigramsStructurePolicy *const bigramPolicy,
        NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
        int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
        int *const outBigramPos, int *const outSiblingPos) {
    int readingPos = ptNodePos;
    const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
    *outFlags = flags;
    *outCodePointCount = getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
    *outProbability = isTerminal(flags) ?
            readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
    *outChildrenPos = hasChildrenInFlags(flags) ?
            readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
    *outShortcutPos = NOT_A_DICT_POS;
    if (hasShortcutTargets(flags)) {
        *outShortcutPos = readingPos;
        shortcutPolicy->skipAllShortcuts(&readingPos);
    }
    *outBigramPos = NOT_A_DICT_POS;
    if (hasBigrams(flags)) {
        *outBigramPos = readingPos;
        bigramPolicy->skipAllBigrams(&readingPos);
    }
    *outSiblingPos = readingPos;
}

} // namespace latinime
Loading