Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c5e6efaf authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi
Browse files

Introduce patriciaTrie to abstract traversing version 2 dictionary.

Bug: 6669677
Change-Id: Ifef72f3d7a7ba67c5232b98c7835485d72d7322d
parent e04794cb
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ LATIN_IME_CORE_SRC_FILES := \
        proximity_info_state_utils.cpp) \
    suggest/core/policy/weighting.cpp \
    suggest/core/session/dic_traverse_session.cpp \
    suggest/policyimpl/dictionary/patricia_trie_policy.cpp \
    suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
    $(addprefix suggest/policyimpl/typing/, \
        scoring_params.cpp \
+4 −2
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include "suggest/core/dictionary/probability_utils.h"
#include "suggest/core/layout/proximity_info.h"
#include "suggest/core/layout/proximity_info_state.h"
#include "suggest/core/policy/dictionary_structure_policy.h"
#include "utils/char_utils.h"

namespace latinime {
@@ -36,14 +37,15 @@ namespace latinime {

/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
        const int prevWordNodePos, DicNode *const newRootNode) {
    newRootNode->initAsRoot(binaryDictionaryInfo->getRootPosition(), prevWordNodePos);
    newRootNode->initAsRoot(binaryDictionaryInfo->getStructurePolicy()->getRootPosition(),
            prevWordNodePos);
}

/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
        const BinaryDictionaryInfo *const binaryDictionaryInfo,
        DicNode *const prevWordLastNode, DicNode *const newRootNode) {
    newRootNode->initAsRootWithPreviousWord(
            prevWordLastNode, binaryDictionaryInfo->getRootPosition());
            prevWordLastNode, binaryDictionaryInfo->getStructurePolicy()->getRootPosition());
}

/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
+5 −6
Original line number Diff line number Diff line
@@ -150,11 +150,10 @@ int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, in
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
        const bool forceLowerCaseSearch) const {
    if (0 >= prevWordLength) return 0;
    const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
    int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength,
            forceLowerCaseSearch);

    int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
            mBinaryDictionaryInfo, prevWord, prevWordLength, forceLowerCaseSearch);
    if (NOT_VALID_WORD == pos) return 0;
    const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
    const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
    if (0 == (flags & BinaryFormat::FLAG_HAS_BIGRAMS)) return 0;
    if (0 == (flags & BinaryFormat::FLAG_HAS_MULTIPLE_CHARS)) {
@@ -189,8 +188,8 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
    int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
    if (0 == pos) return false;
    int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(),
            word1, length1, false /* forceLowerCaseSearch */);
    int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
            mBinaryDictionaryInfo, word1, length1, false /* forceLowerCaseSearch */);
    if (NOT_VALID_WORD == nextWordPos) return false;

    for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
+9 −7
Original line number Diff line number Diff line
@@ -22,11 +22,10 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_format_utils.h"
#include "suggest/core/dictionary/binary_dictionary_header.h"
#include "suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h"

namespace latinime {

class BinaryDictionaryHeader;

class BinaryDictionaryInfo {
 public:
    BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize, const int mmapFd,
@@ -35,7 +34,9 @@ class BinaryDictionaryInfo {
              mDictBufOffset(dictBufOffset), mIsUpdatable(isUpdatable),
              mDictionaryFormat(BinaryDictionaryFormatUtils::detectFormatVersion(
                      mDictBuf, mDictSize)),
              mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {}
              mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()),
              mStructurePolicy(DictionaryStructurePolicyFactory::getDictionaryStructurePolicy(
                      mDictionaryFormat)) {}

    AK_FORCE_INLINE const uint8_t *getDictBuf() const {
        return mDictBuf;
@@ -61,10 +62,6 @@ class BinaryDictionaryInfo {
        return mDictionaryFormat;
    }

    AK_FORCE_INLINE int getRootPosition() const {
        return 0;
    }

    AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const {
        return &mDictionaryHeader;
    }
@@ -75,6 +72,10 @@ class BinaryDictionaryInfo {
        return mIsUpdatable && isUpdatableDictionaryFormat;
    }

    AK_FORCE_INLINE const DictionaryStructurePolicy *getStructurePolicy() const {
        return mStructurePolicy;
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo);

@@ -86,6 +87,7 @@ class BinaryDictionaryInfo {
    const BinaryDictionaryFormatUtils::FORMAT_VERSION mDictionaryFormat;
    const BinaryDictionaryHeader mDictionaryHeader;
    const uint8_t *const mDictRoot;
    const DictionaryStructurePolicy *const mStructurePolicy;
};
}
#endif /* LATINIME_BINARY_DICTIONARY_INFO_H */
+4 −17
Original line number Diff line number Diff line
@@ -83,27 +83,14 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in
}

int Dictionary::getProbability(const int *word, int length) const {
    const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot();
    int pos = BinaryFormat::getTerminalPosition(root, word, length,
    const DictionaryStructurePolicy *const structurePolicy =
            mBinaryDictionaryInfo.getStructurePolicy();
    int pos = structurePolicy->getTerminalNodePositionOfWord(&mBinaryDictionaryInfo, word, length,
            false /* forceLowerCaseSearch */);
    if (NOT_VALID_WORD == pos) {
        return NOT_A_PROBABILITY;
    }
    const uint8_t flags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
    if (flags & (BinaryFormat::FLAG_IS_BLACKLISTED | BinaryFormat::FLAG_IS_NOT_A_WORD)) {
        // If this is not a word, or if it's a blacklisted entry, it should behave as
        // having no probability outside of the suggestion process (where it should be used
        // for shortcuts).
        return NOT_A_PROBABILITY;
    }
    const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags));
    if (hasMultipleChars) {
        pos = BinaryFormat::skipOtherCharacters(root, pos);
    } else {
        BinaryFormat::getCodePointAndForwardPointer(root, &pos);
    }
    const int unigramProbability = BinaryFormat::readProbabilityWithoutMovingPointer(root, pos);
    return unigramProbability;
    return structurePolicy->getUnigramProbability(&mBinaryDictionaryInfo, pos);
}

bool Dictionary::isValidBigram(const int *word0, int length0, const int *word1, int length1) const {
Loading