Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 668870be authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Move bigrams iteration methods to policy.

To support various bigram format.

Bug: 6669677
Change-Id: Ifc8c1a855b03cd5a39d97a6e10872ef8ef76475b
parent 7ec9db2c
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -118,8 +118,8 @@ namespace latinime {
        return ProbabilityUtils::backoff(unigramProbability);
    }
    if (multiBigramMap) {
        return multiBigramMap->getBigramProbability(
                binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability);
        return multiBigramMap->getBigramProbability(binaryDictionaryInfo->getStructurePolicy(),
                prevWordPos, wordPos, unigramProbability);
    }
    return ProbabilityUtils::backoff(unigramProbability);
}
+14 −8
Original line number Diff line number Diff line
@@ -112,10 +112,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
    int bigramCount = 0;
    int unigramProbability = 0;
    int bigramBuffer[MAX_WORD_LENGTH];
    BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
    const DictionaryStructureWithBufferPolicy *const structurePolicy =
            mBinaryDictionaryInfo->getStructurePolicy();
    BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        const int length = mBinaryDictionaryInfo->getStructurePolicy()->
        const int length = structurePolicy->
                getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
                        MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
        // Due to space constraints, the probability for bigrams is approximate - the lower the
@@ -137,10 +139,12 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const int prevWordLength,
        const bool forceLowerCaseSearch) const {
    if (0 >= prevWordLength) return NOT_A_DICT_POS;
    int pos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
            prevWord, prevWordLength, forceLowerCaseSearch);
    const DictionaryStructureWithBufferPolicy *const structurePolicy =
            mBinaryDictionaryInfo->getStructurePolicy();
    int pos = structurePolicy->getTerminalNodePositionOfWord(prevWord, prevWordLength,
            forceLowerCaseSearch);
    if (NOT_A_VALID_WORD_POS == pos) return NOT_A_DICT_POS;
    return mBinaryDictionaryInfo->getStructurePolicy()->getBigramsPositionOfNode(pos);
    return structurePolicy->getBigramsPositionOfNode(pos);
}

bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *word1,
@@ -148,11 +152,13 @@ bool BigramDictionary::isValidBigram(const int *word0, int length0, const int *w
    int pos = getBigramListPositionForWord(word0, length0, false /* forceLowerCaseSearch */);
    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
    if (NOT_A_DICT_POS == pos) return false;
    int nextWordPos = mBinaryDictionaryInfo->getStructurePolicy()->getTerminalNodePositionOfWord(
            word1, length1, false /* forceLowerCaseSearch */);
    const DictionaryStructureWithBufferPolicy *const structurePolicy =
            mBinaryDictionaryInfo->getStructurePolicy();
    int nextWordPos = structurePolicy->getTerminalNodePositionOfWord(word1, length1,
            false /* forceLowerCaseSearch */);
    if (NOT_A_VALID_WORD_POS == nextWordPos) return false;

    BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
    BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), pos);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == nextWordPos) {
+9 −19
Original line number Diff line number Diff line
@@ -18,51 +18,41 @@
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H

#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"

namespace latinime {

class BinaryDictionaryBigramsIterator {
 public:
    BinaryDictionaryBigramsIterator(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
            : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
              mBigramPos(NOT_A_DICT_POS), mHasNext(pos != NOT_A_DICT_POS) {}
            const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
            : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),
              mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
              mHasNext(pos != NOT_A_DICT_POS) {}

    AK_FORCE_INLINE bool hasNext() const {
        return mHasNext;
    }

    AK_FORCE_INLINE void next() {
        mBigramFlags = BinaryDictionaryTerminalAttributesReadingUtils::getFlagsAndForwardPointer(
                mBinaryDictionaryInfo, &mPos);
        mBigramPos =
                BinaryDictionaryTerminalAttributesReadingUtils::getBigramAddressAndForwardPointer(
                        mBinaryDictionaryInfo, mBigramFlags, &mPos);
        mHasNext = BinaryDictionaryTerminalAttributesReadingUtils::hasNext(mBigramFlags);
        mBigramsStructurePolicy->getNextBigram(&mBigramPos, &mProbability, &mHasNext, &mPos);
    }

    AK_FORCE_INLINE int getProbability() const {
        return BinaryDictionaryTerminalAttributesReadingUtils::getProbabilityFromFlags(
                mBigramFlags);
        return mProbability;
    }

    AK_FORCE_INLINE int getBigramPos() const {
        return mBigramPos;
    }

    AK_FORCE_INLINE int getFlags() const {
        return mBigramFlags;
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);

    const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
    const DictionaryBigramsStructurePolicy *const mBigramsStructurePolicy;
    int mPos;
    BinaryDictionaryTerminalAttributesReadingUtils::BigramFlags mBigramFlags;
    int mBigramPos;
    int mProbability;
    bool mHasNext;
};
} // namespace latinime
+4 −8
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@

#include "suggest/core/dictionary/binary_dictionary_terminal_attributes_reading_utils.h"

#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/byte_array_utils.h"

namespace latinime {
@@ -38,22 +37,19 @@ const int TaUtils::SHORTCUT_LIST_SIZE_FIELD_SIZE = 2;
const int TaUtils::WHITELIST_SHORTCUT_PROBABILITY = 15;

/* static */ int TaUtils::getBigramAddressAndForwardPointer(
        const BinaryDictionaryInfo *const binaryDictionaryInfo, const TerminalAttributeFlags flags,
        const uint8_t *const dictRoot, const TerminalAttributeFlags flags,
        int *const pos) {
    int offset = 0;
    const int origin = *pos;
    switch (MASK_ATTRIBUTE_ADDRESS_TYPE & flags) {
        case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
            offset = ByteArrayUtils::readUint8AndAdvancePosition(
                    binaryDictionaryInfo->getDictRoot(), pos);
            offset = ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
            break;
        case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
            offset = ByteArrayUtils::readUint16AndAdvancePosition(
                    binaryDictionaryInfo->getDictRoot(), pos);
            offset = ByteArrayUtils::readUint16AndAdvancePosition(dictRoot, pos);
            break;
        case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
            offset = ByteArrayUtils::readUint24AndAdvancePosition(
                    binaryDictionaryInfo->getDictRoot(), pos);
            offset = ByteArrayUtils::readUint24AndAdvancePosition(dictRoot, pos);
            break;
    }
    if (isOffsetNegative(flags)) {
+6 −8
Original line number Diff line number Diff line
@@ -32,9 +32,8 @@ class BinaryDictionaryTerminalAttributesReadingUtils {
    typedef TerminalAttributeFlags ShortcutFlags;

    static AK_FORCE_INLINE TerminalAttributeFlags getFlagsAndForwardPointer(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
        return ByteArrayUtils::readUint8AndAdvancePosition(
                binaryDictionaryInfo->getDictRoot(), pos);
            const uint8_t *const dictRoot, int *const pos) {
        return ByteArrayUtils::readUint8AndAdvancePosition(dictRoot, pos);
    }

    static AK_FORCE_INLINE int getProbabilityFromFlags(const TerminalAttributeFlags flags) {
@@ -47,18 +46,17 @@ class BinaryDictionaryTerminalAttributesReadingUtils {

    // Bigrams reading methods
    static AK_FORCE_INLINE void skipExistingBigrams(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, int *const pos) {
        BigramFlags flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
            const uint8_t *const dictRoot, int *const pos) {
        BigramFlags flags = getFlagsAndForwardPointer(dictRoot, pos);
        while (hasNext(flags)) {
            *pos += attributeAddressSize(flags);
            flags = getFlagsAndForwardPointer(binaryDictionaryInfo, pos);
            flags = getFlagsAndForwardPointer(dictRoot, pos);
        }
        *pos += attributeAddressSize(flags);
    }

    static int getBigramAddressAndForwardPointer(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, const BigramFlags flags,
                    int *const pos);
            const uint8_t *const dictRoot, const BigramFlags flags, int *const pos);

    // Shortcuts reading methods
    // This method returns the size of the shortcut list region excluding the shortcut list size
Loading