Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1e275292 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Create Ver2ParticiaTrieNodeReader.

Bug: 12810574

Change-Id: I7d3298b5f419d557755ae433c8b8cc0d145f4cc3
parent 1d6afa17
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -57,7 +57,8 @@ LATIN_IME_CORE_SRC_FILES := \
        dynamic_pt_writing_utils.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
        patricia_trie_policy.cpp \
        patricia_trie_reading_utils.cpp) \
        patricia_trie_reading_utils.cpp \
        ver2_patricia_trie_node_reader.cpp) \
    $(addprefix suggest/policyimpl/dictionary/structure/v4/, \
        ver4_dict_buffers.cpp \
        ver4_dict_constants.cpp \
+15 −0
Original line number Diff line number Diff line
@@ -53,6 +53,21 @@ class PtNodeParams {
        memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
    }

    // PtNode read from version 2 dictionary.
    PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
            const int codePointCount, const int *const codePoints, const int probability,
            const int childrenPos, const int shortcutPos, const int bigramPos,
            const int siblingPos)
            : mHeadPos(headPos), mFlags(flags), mParentPos(NOT_A_DICT_POS),
              mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
              mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
              mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
              mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
              mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
              mBigramPos(bigramPos), mSiblingPos(siblingPos) {
        memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
    }

    // PtNode with a terminal id.
    PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
            const int parentPos, const int codePointCount, const int *const codePoints,
+18 −67
Original line number Diff line number Diff line
@@ -336,99 +336,50 @@ int PatriciaTriePolicy::getUnigramProbabilityOfPtNode(const int ptNodePos) const
    if (ptNodePos == NOT_A_DICT_POS) {
        return NOT_A_PROBABILITY;
    }
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    if (!PatriciaTrieReadingUtils::isTerminal(flags)) {
        return NOT_A_PROBABILITY;
    }
    if (PatriciaTrieReadingUtils::isNotAWord(flags)
            || PatriciaTrieReadingUtils::isBlacklisted(flags)) {
    const PtNodeParams ptNodeParams = mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos);
    if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
        // If this is not a word, or if it's a blacklisted entry, it should behave as
        // having no probability outside of the suggestion process (where it should be used
        // for shortcuts).
        return NOT_A_PROBABILITY;
    }
    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    return getProbability(PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(
            mDictRoot, &pos), NOT_A_PROBABILITY);
    return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
}

int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
    if (ptNodePos == NOT_A_DICT_POS) {
        return NOT_A_DICT_POS;
    }
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    if (!PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
        return NOT_A_DICT_POS;
    }
    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
    }
    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
    }
    return pos;
    return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getShortcutPos();
}

int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
    if (ptNodePos == NOT_A_DICT_POS) {
        return NOT_A_DICT_POS;
    }
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    if (!PatriciaTrieReadingUtils::hasBigrams(flags)) {
        return NOT_A_DICT_POS;
    }
    PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    if (PatriciaTrieReadingUtils::isTerminal(flags)) {
        PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
    }
    if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
        PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &pos);
    }
    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
        mShortcutListPolicy.skipAllShortcuts(&pos);;
    }
    return pos;
    return mPtNodeReader.fetchNodeInfoInBufferFromPtNodePos(ptNodePos).getBigramsPos();
}

int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
        const int ptNodePos, DicNodeVector *childDicNodes) const {
    int pos = ptNodePos;
    const PatriciaTrieReadingUtils::NodeFlags flags =
            PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    PatriciaTrieReadingUtils::NodeFlags flags;
    int mergedNodeCodePointCount = 0;
    int mergedNodeCodePoints[MAX_WORD_LENGTH];
    const int mergedNodeCodePointCount = PatriciaTrieReadingUtils::getCharsAndAdvancePosition(
            mDictRoot, flags, MAX_WORD_LENGTH, mergedNodeCodePoints, &pos);
    const int probability = (PatriciaTrieReadingUtils::isTerminal(flags))?
            PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos)
                    : NOT_A_PROBABILITY;
    const int childrenPos = PatriciaTrieReadingUtils::hasChildrenInFlags(flags) ?
            PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
                    mDictRoot, flags, &pos) : NOT_A_DICT_POS;
    if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
        getShortcutsStructurePolicy()->skipAllShortcuts(&pos);
    }
    if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
        getBigramsStructurePolicy()->skipAllBigrams(&pos);
    }
    if (mergedNodeCodePointCount <= 0) {
        AKLOGE("Empty PtNode is not allowed. Code point count: %d", mergedNodeCodePointCount);
        ASSERT(false);
        return pos;
    }
    int probability = NOT_A_PROBABILITY;
    int childrenPos = NOT_A_DICT_POS;
    int shortcutPos = NOT_A_DICT_POS;
    int bigramPos = NOT_A_DICT_POS;
    int siblingPos = NOT_A_DICT_POS;
    PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
            getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
            &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
    childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
            PatriciaTrieReadingUtils::isTerminal(flags),
            PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
            PatriciaTrieReadingUtils::isBlacklisted(flags) ||
                    PatriciaTrieReadingUtils::isNotAWord(flags),
            PatriciaTrieReadingUtils::isBlacklisted(flags)
                    || PatriciaTrieReadingUtils::isNotAWord(flags),
            mergedNodeCodePointCount, mergedNodeCodePoints);
    return pos;
    return siblingPos;
}

} // namespace latinime
+4 −1
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include "suggest/policyimpl/dictionary/bigram/bigram_list_policy.h"
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/shortcut/shortcut_list_policy.h"
#include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"

@@ -40,7 +41,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
              mDictRoot(mMmappedBuffer.get()->getBuffer() + mHeaderPolicy.getSize()),
              mDictBufferSize(mMmappedBuffer.get()->getBufferSize()
                      - mHeaderPolicy.getSize()),
              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {}
              mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot),
              mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy) {}

    AK_FORCE_INLINE int getRootPosition() const {
        return 0;
@@ -143,6 +145,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
    const int mDictBufferSize;
    const BigramListPolicy mBigramListPolicy;
    const ShortcutListPolicy mShortcutListPolicy;
    const Ver2ParticiaTrieNodeReader mPtNodeReader;

    int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
            DicNodeVector *const childDicNodes) const;
+30 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_reading_utils.h"

#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/utils/byte_array_utils.h"

namespace latinime {
@@ -130,4 +132,32 @@ const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
    return base + offset;
}

/* static */ void PtReadingUtils::readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
        const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
        const DictionaryBigramsStructurePolicy *const bigramPolicy,
        NodeFlags *const outFlags, int *const outCodePointCount, int *const outCodePoint,
        int *const outProbability, int *const outChildrenPos, int *const outShortcutPos,
        int *const outBigramPos, int *const outSiblingPos) {
    int readingPos = ptNodePos;
    const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
    *outFlags = flags;
    *outCodePointCount = getCharsAndAdvancePosition(
            dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
    *outProbability = isTerminal(flags) ?
            readProbabilityAndAdvancePosition(dictBuf, &readingPos) : NOT_A_PROBABILITY;
    *outChildrenPos = hasChildrenInFlags(flags) ?
            readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos) : NOT_A_DICT_POS;
    *outShortcutPos = NOT_A_DICT_POS;
    if (hasShortcutTargets(flags)) {
        *outShortcutPos = readingPos;
        shortcutPolicy->skipAllShortcuts(&readingPos);
    }
    *outBigramPos = NOT_A_DICT_POS;
    if (hasBigrams(flags)) {
        *outBigramPos = readingPos;
        bigramPolicy->skipAllBigrams(&readingPos);
    }
    *outSiblingPos = readingPos;
}

} // namespace latinime
Loading