Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 202e416b authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Add bigram writing methods.

Bug: 6669677

Change-Id: Ib62f6c31c4bcbb41ac200dccccb13e366b3c2173
parent 64621cf4
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -68,6 +68,7 @@ LATIN_IME_CORE_SRC_FILES := \
    suggest/core/session/dic_traverse_session.cpp \
    $(addprefix suggest/policyimpl/dictionary/, \
        bigram/bigram_list_reading_utils.cpp \
        bigram/dynamic_bigram_list_policy.cpp \
        header/header_policy.cpp \
        header/header_reading_utils.cpp \
        shortcut/shortcut_list_reading_utils.cpp \
+9 −3
Original line number Diff line number Diff line
@@ -117,9 +117,15 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
            mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
    while (bigramsIt.hasNext()) {
        bigramsIt.next();
        const int length = mDictionaryStructurePolicy->
        if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
            continue;
        }
        const int codePointCount = mDictionaryStructurePolicy->
                getCodePointsAndProbabilityAndReturnCodePointCount(bigramsIt.getBigramPos(),
                        MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);
        if (codePointCount <= 0) {
            continue;
        }
        // Due to space constraints, the probability for bigrams is approximate - the lower the
        // unigram probability, the worse the precision. The theoritical maximum error in
        // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
@@ -127,8 +133,8 @@ int BigramDictionary::getPredictions(const int *prevWord, const int prevWordLeng
        // here, but it can't get too bad.
        const int probability = ProbabilityUtils::computeProbabilityForBigram(
                unigramProbability, bigramsIt.getProbability());
        addWordBigram(bigramBuffer, length, probability, outBigramProbability, outBigramCodePoints,
                outputTypes);
        addWordBigram(bigramBuffer, codePointCount, probability, outBigramProbability,
                outBigramCodePoints, outputTypes);
        ++bigramCount;
    }
    return min(bigramCount, MAX_RESULTS);
+3 −0
Original line number Diff line number Diff line
@@ -73,6 +73,9 @@ class MultiBigramMap {
                    bigramsListPos);
            while (bigramsIt.hasNext()) {
                bigramsIt.next();
                if (bigramsIt.getBigramPos() == NOT_A_VALID_WORD_POS) {
                    continue;
                }
                mBigramMap[bigramsIt.getBigramPos()] = bigramsIt.getProbability();
                mBloomFilter.setInFilter(bigramsIt.getBigramPos());
            }
+6 −6
Original line number Diff line number Diff line
@@ -33,16 +33,16 @@ class BigramListPolicy : public DictionaryBigramsStructurePolicy {

    void getNextBigram(int *const outBigramPos, int *const outProbability, bool *const outHasNext,
            int *const pos) const {
        const BigramListReadingUtils::BigramFlags flags =
                BigramListReadingUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
        *outBigramPos = BigramListReadingUtils::getBigramAddressAndForwardPointer(
        const BigramListReadWriteUtils::BigramFlags flags =
                BigramListReadWriteUtils::getFlagsAndForwardPointer(mBigramsBuf, pos);
        *outBigramPos = BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
                        mBigramsBuf, flags, pos);
        *outProbability = BigramListReadingUtils::getProbabilityFromFlags(flags);
        *outHasNext = BigramListReadingUtils::hasNext(flags);
        *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(flags);
        *outHasNext = BigramListReadWriteUtils::hasNext(flags);
    }

    void skipAllBigrams(int *const pos) const {
        BigramListReadingUtils::skipExistingBigrams(mBigramsBuf, pos);
        BigramListReadWriteUtils::skipExistingBigrams(mBigramsBuf, pos);
    }

 private:
+18 −14
Original line number Diff line number Diff line
@@ -20,24 +20,25 @@

namespace latinime {

const BigramListReadingUtils::BigramFlags BigramListReadingUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::MASK_ATTRIBUTE_ADDRESS_TYPE =
        0x30;
const BigramListReadingUtils::BigramFlags
        BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
const BigramListReadingUtils::BigramFlags
        BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
const BigramListReadingUtils::BigramFlags
        BigramListReadingUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
const BigramListReadingUtils::BigramFlags
        BigramListReadingUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
const BigramListReadWriteUtils::BigramFlags
        BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
const BigramListReadWriteUtils::BigramFlags
        BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
const BigramListReadWriteUtils::BigramFlags
        BigramListReadWriteUtils::FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
const BigramListReadWriteUtils::BigramFlags
        BigramListReadWriteUtils::FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
// Flag for presence of more attributes
const BigramListReadingUtils::BigramFlags BigramListReadingUtils::FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
const BigramListReadWriteUtils::BigramFlags BigramListReadWriteUtils::FLAG_ATTRIBUTE_HAS_NEXT =
        0x80;
// Mask for attribute probability, stored on 4 bits inside the flags byte.
const BigramListReadingUtils::BigramFlags
        BigramListReadingUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
const BigramListReadWriteUtils::BigramFlags
        BigramListReadWriteUtils::MASK_ATTRIBUTE_PROBABILITY = 0x0F;
const int BigramListReadWriteUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;

/* static */ int BigramListReadingUtils::getBigramAddressAndForwardPointer(
/* static */ int BigramListReadWriteUtils::getBigramAddressAndForwardPointer(
        const uint8_t *const bigramsBuf, const BigramFlags flags, int *const pos) {
    int offset = 0;
    const int origin = *pos;
@@ -52,6 +53,9 @@ const int BigramListReadingUtils::ATTRIBUTE_ADDRESS_SHIFT = 4;
            offset = ByteArrayUtils::readUint24AndAdvancePosition(bigramsBuf, pos);
            break;
    }
    if (offset == 0) {
        return NOT_A_VALID_WORD_POS;
    }
    if (isOffsetNegative(flags)) {
        return origin - offset;
    } else {
Loading