Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4f191935 authored by Keisuke Kuroynagi's avatar Keisuke Kuroynagi Committed by Android (Google) Code Review
Browse files

Merge "Introduce BinaryDictionaryBigramsIterator to access bigrams attributes...

Merge "Introduce BinaryDictionaryBigramsIterator to access bigrams attributes in binary dictionaries."
parents 78b881bf a71ed8ca
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ LATIN_IME_CORE_SRC_FILES := \
        dic_nodes_cache.cpp) \
    $(addprefix suggest/core/dictionary/, \
        bigram_dictionary.cpp \
        binary_dictionary_bigrams_reading_utils.cpp \
        binary_dictionary_format_utils.cpp \
        binary_dictionary_header.cpp \
        binary_dictionary_header_reading_utils.cpp \
+1 −2
Original line number Diff line number Diff line
@@ -233,8 +233,7 @@ namespace latinime {
        return multiBigramMap->getBigramProbability(
                binaryDictionaryInfo, prevWordPos, wordPos, unigramProbability);
    }
    return BinaryFormat::getBigramProbability(
            binaryDictionaryInfo->getDictRoot(), prevWordPos, wordPos, unigramProbability);
    return ProbabilityUtils::backoff(unigramProbability);
}

///////////////////////////////////////
+21 −24
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include "bigram_dictionary.h"

#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"
#include "suggest/core/dictionary/binary_format.h"
#include "suggest/core/dictionary/dictionary.h"
@@ -100,12 +101,11 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int
 * and the bigrams are used to boost unigram result scores, it makes little sense to
 * reduce their scope to the ones that match the first letter.
 */
int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *inputCodePoints,
int BigramDictionary::getPredictions(const int *prevWord, int prevWordLength, int *inputCodePoints,
        int inputSize, int *bigramCodePoints, int *bigramProbability, int *outputTypes) const {
    // TODO: remove unused arguments, and refrain from storing stuff in members of this class
    // TODO: have "in" arguments before "out" ones, and make out args explicit in the name

    const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
    int pos = getBigramListPositionForWord(prevWord, prevWordLength,
            false /* forceLowerCaseSearch */);
    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
@@ -116,21 +116,20 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
    }
    // If still no bigrams, we really don't have them!
    if (0 == pos) return 0;
    uint8_t bigramFlags;

    int bigramCount = 0;
    do {
        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
        int bigramBuffer[MAX_WORD_LENGTH];
    int unigramProbability = 0;
        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
                &pos);
        const int length = BinaryFormat::getWordAtAddress(root, bigramPos, MAX_WORD_LENGTH,
                bigramBuffer, &unigramProbability);
    int bigramBuffer[MAX_WORD_LENGTH];
    for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
            bigramsIt.hasNext(); /* no-op */) {
        bigramsIt.next();
        const int length = BinaryFormat::getWordAtAddress(
                mBinaryDictionaryInfo->getDictRoot(), bigramsIt.getBigramPos(),
                MAX_WORD_LENGTH, bigramBuffer, &unigramProbability);

        // inputSize == 0 means we are trying to find bigram predictions.
        if (inputSize < 1 || checkFirstCharacter(bigramBuffer, inputCodePoints)) {
            const int bigramProbabilityTemp =
                    BinaryFormat::MASK_ATTRIBUTE_PROBABILITY & bigramFlags;
            const int bigramProbabilityTemp = bigramsIt.getProbability();
            // Due to space constraints, the probability for bigrams is approximate - the lower the
            // unigram probability, the worse the precision. The theoritical maximum error in
            // resulting probability is 8 - although in the practice it's never bigger than 3 or 4
@@ -142,7 +141,7 @@ int BigramDictionary::getBigrams(const int *prevWord, int prevWordLength, int *i
                    outputTypes);
            ++bigramCount;
        }
    } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
    }
    return min(bigramCount, MAX_RESULTS);
}

@@ -187,22 +186,20 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons

bool BigramDictionary::isValidBigram(const int *word1, int length1, const int *word2,
        int length2) const {
    const uint8_t *const root = mBinaryDictionaryInfo->getDictRoot();
    int pos = getBigramListPositionForWord(word1, length1, false /* forceLowerCaseSearch */);
    // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
    if (0 == pos) return false;
    int nextWordPos = BinaryFormat::getTerminalPosition(root, word2, length2,
            false /* forceLowerCaseSearch */);
    int nextWordPos = BinaryFormat::getTerminalPosition(mBinaryDictionaryInfo->getDictRoot(),
            word2, length2, false /* forceLowerCaseSearch */);
    if (NOT_VALID_WORD == nextWordPos) return false;
    uint8_t bigramFlags;
    do {
        bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos);
        const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags,
                &pos);
        if (bigramPos == nextWordPos) {

    for (BinaryDictionaryBigramsIterator bigramsIt(mBinaryDictionaryInfo, pos);
            bigramsIt.hasNext(); /* no-op */) {
        bigramsIt.next();
        if (bigramsIt.getBigramPos() == nextWordPos) {
            return true;
        }
    } while (BinaryFormat::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags);
    }
    return false;
}

+2 −2
Original line number Diff line number Diff line
@@ -27,8 +27,8 @@ class BigramDictionary {
 public:
    BigramDictionary(const BinaryDictionaryInfo *const binaryDictionaryInfo);

    int getBigrams(const int *word, int length, int *inputCodePoints, int inputSize, int *outWords,
            int *frequencies, int *outputTypes) const;
    int getPredictions(const int *word, int length, int *inputCodePoints, int inputSize,
            int *outWords, int *frequencies, int *outputTypes) const;
    bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const;
    ~BigramDictionary();

+67 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
#define LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H

#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_reading_utils.h"
#include "suggest/core/dictionary/binary_dictionary_info.h"

namespace latinime {

class BinaryDictionaryBigramsIterator {
 public:
    BinaryDictionaryBigramsIterator(
            const BinaryDictionaryInfo *const binaryDictionaryInfo, const int pos)
            : mBinaryDictionaryInfo(binaryDictionaryInfo), mPos(pos), mBigramFlags(0),
              mBigramPos(0), mHasNext(true) {}

    AK_FORCE_INLINE bool hasNext() const {
        return mHasNext;
    }

    AK_FORCE_INLINE void next() {
        mBigramFlags = BinaryDictionaryBigramsReadingUtils::getFlagsAndForwardPointer(
                mBinaryDictionaryInfo, &mPos);
        mBigramPos = BinaryDictionaryBigramsReadingUtils::getBigramAddressAndForwardPointer(
                mBinaryDictionaryInfo, mBigramFlags, &mPos);
        mHasNext = BinaryDictionaryBigramsReadingUtils::hasNext(mBigramFlags);
    }

    AK_FORCE_INLINE int getProbability() const {
        return BinaryDictionaryBigramsReadingUtils::getBigramProbability(mBigramFlags);
    }

    AK_FORCE_INLINE int getBigramPos() const {
        return mBigramPos;
    }

    AK_FORCE_INLINE int getFlags() const {
        return mBigramFlags;
    }

 private:
    DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryBigramsIterator);

    const BinaryDictionaryInfo *const mBinaryDictionaryInfo;
    int mPos;
    BinaryDictionaryBigramsReadingUtils::BigramFlags mBigramFlags;
    int mBigramPos;
    bool mHasNext;
};
} // namespace latinime
#endif // LATINIME_BINARY_DICTIONARY_BIGRAMS_ITERATOR_H
Loading