Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6ae4d79d authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Use IntArrayView for dictionary structure policy.

Change-Id: I0dc94908259d70d5085ff22abf422d90affb1452
parent b85bf4eb
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include "utils/char_utils.h"
#include "utils/int_array_view.h"
#include "utils/jni_data_utils.h"
#include "utils/log_utils.h"
#include "utils/time_keeper.h"
@@ -581,8 +582,9 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
                return false;
            }
        }
        if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
                wordCodePointCount, wordProperty.getUnigramProperty())) {
        if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(
                CodePointArrayView(wordCodePoints, wordCodePointCount),
                wordProperty.getUnigramProperty())) {
            LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
            return false;
        }
+10 −6
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@
#include "suggest/core/suggest_options.h"
#include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
#include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
#include "utils/int_array_view.h"
#include "utils/log_utils.h"
#include "utils/time_keeper.h"

@@ -112,8 +113,8 @@ int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) con
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
        int length) const {
    TimeKeeper::setCurrentTime();
    int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
            length, false /* forceLowerCaseSearch */);
    int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(
            CodePointArrayView(word, length), false /* forceLowerCaseSearch */);
    if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
    if (!prevWordsInfo) {
        return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
@@ -135,12 +136,14 @@ bool Dictionary::addUnigramEntry(const int *const word, const int length,
        return false;
    }
    TimeKeeper::setCurrentTime();
    return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
    return mDictionaryStructureWithBufferPolicy->addUnigramEntry(CodePointArrayView(word, length),
            unigramProperty);
}

bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
    TimeKeeper::setCurrentTime();
    return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
    return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(
            CodePointArrayView(codePoints, codePointCount));
}

bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
@@ -152,7 +155,8 @@ bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
        const int *const word, const int length) {
    TimeKeeper::setCurrentTime();
    return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
    return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo,
            CodePointArrayView(word, length));
}

bool Dictionary::flush(const char *const filePath) {
@@ -181,7 +185,7 @@ const WordProperty Dictionary::getWordProperty(const int *const codePoints,
        const int codePointCount) {
    TimeKeeper::setCurrentTime();
    return mDictionaryStructureWithBufferPolicy->getWordProperty(
            codePoints, codePointCount);
            CodePointArrayView(codePoints, codePointCount));
}

int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
+11 −12
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@

#include "defines.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "utils/int_array_view.h"

namespace latinime {

@@ -49,33 +50,32 @@ class DictionaryStructureWithBufferPolicy {
            DicNodeVector *const childDicNodes) const = 0;

    virtual int getCodePointsAndProbabilityAndReturnCodePointCount(
            const int nodePos, const int maxCodePointCount, int *const outCodePoints,
            const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
            int *const outUnigramProbability) const = 0;

    virtual int getTerminalPtNodePositionOfWord(const int *const inWord,
            const int length, const bool forceLowerCaseSearch) const = 0;
    virtual int getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
            const bool forceLowerCaseSearch) const = 0;

    virtual int getProbability(const int unigramProbability,
            const int bigramProbability) const = 0;
    virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0;

    virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
            const int nodePos) const = 0;
            const int ptNodePos) const = 0;

    virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
            NgramListener *const listener) const = 0;

    virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
    virtual int getShortcutPositionOfPtNode(const int ptNodePos) const = 0;

    virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;

    virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;

    // Returns whether the update was success or not.
    virtual bool addUnigramEntry(const int *const word, const int length,
    virtual bool addUnigramEntry(const CodePointArrayView wordCodePoints,
            const UnigramProperty *const unigramProperty) = 0;

    // Returns whether the update was success or not.
    virtual bool removeUnigramEntry(const int *const word, const int length) = 0;
    virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;

    // Returns whether the update was success or not.
    virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
@@ -83,7 +83,7 @@ class DictionaryStructureWithBufferPolicy {

    // Returns whether the update was success or not.
    virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
            const int *const word, const int length) = 0;
            const CodePointArrayView wordCodePoints) = 0;

    // Returns whether the flush was success or not.
    virtual bool flush(const char *const filePath) = 0;
@@ -99,8 +99,7 @@ class DictionaryStructureWithBufferPolicy {
            const int maxResultLength) = 0;

    // Used for testing.
    virtual const WordProperty getWordProperty(const int *const codePonts,
            const int codePointCount) const = 0;
    virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0;

    // Method to iterate all words in the dictionary.
    // The returned token has to be used to get the next word. If token is 0, this method newly
+7 −13
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
#include "utils/int_array_view.h"

namespace latinime {

@@ -91,19 +92,11 @@ class PrevWordsInfo {
    }

    // n is 1-indexed.
    const int *getNthPrevWordCodePoints(const int n) const {
    const CodePointArrayView getNthPrevWordCodePoints(const int n) const {
        if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
            return nullptr;
            return CodePointArrayView();
        }
        return mPrevWordCodePoints[n - 1];
    }

    // n is 1-indexed.
    int getNthPrevWordCodePointCount(const int n) const {
        if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
            return 0;
        }
        return mPrevWordCodePointCount[n - 1];
        return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]);
    }

    // n is 1-indexed.
@@ -134,8 +127,9 @@ class PrevWordsInfo {
                return NOT_A_DICT_POS;
            }
        }
        const CodePointArrayView codePointArrayView(codePoints, codePointCount);
        const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
                codePoints, codePointCount, false /* forceLowerCaseSearch */);
                codePointArrayView, false /* forceLowerCaseSearch */);
        if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
            // Return the position when when the word was found or doesn't try lower case
            // search.
@@ -144,7 +138,7 @@ class PrevWordsInfo {
        // Check bigrams for lower-cased previous word if original was not found. Useful for
        // auto-capitalized words like "The [current_word]".
        return dictStructurePolicy->getTerminalPtNodePositionOfWord(
                codePoints, codePointCount, true /* forceLowerCaseSearch */);
                codePointArrayView, true /* forceLowerCaseSearch */);
    }

    void clear() {
+26 −24
Original line number Diff line number Diff line
@@ -104,12 +104,12 @@ int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
    return codePointCount;
}

int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
        const int length, const bool forceLowerCaseSearch) const {
int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const CodePointArrayView wordCodePoints,
        const bool forceLowerCaseSearch) const {
    DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    readingHelper.initWithPtNodeArrayPos(getRootPosition());
    const int ptNodePos =
            readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
    const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
            wordCodePoints.size(), forceLowerCaseSearch);
    if (readingHelper.isError()) {
        mIsCorrupted = true;
        AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
@@ -194,7 +194,7 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
            ptNodeParams.getTerminalId());
}

bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints,
        const UnigramProperty *const unigramProperty) {
    if (!mBuffers->isUpdatable()) {
        AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
@@ -205,8 +205,9 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
                mDictBuffer->getTailPosition());
        return false;
    }
    if (length > MAX_WORD_LENGTH) {
        AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
    if (wordCodePoints.size() > MAX_WORD_LENGTH) {
        AKLOGE("The word is too long to insert to the dictionary, length: %zd",
                wordCodePoints.size());
        return false;
    }
    for (const auto &shortcut : unigramProperty->getShortcuts()) {
@@ -220,8 +221,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
    readingHelper.initWithPtNodeArrayPos(getRootPosition());
    bool addedNewUnigram = false;
    int codePointsToAdd[MAX_WORD_LENGTH];
    int codePointCountToAdd = length;
    memmove(codePointsToAdd, word, sizeof(int) * length);
    int codePointCountToAdd = wordCodePoints.size();
    memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd);
    if (unigramProperty->representsBeginningOfSentence()) {
        codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
                codePointCountToAdd, MAX_WORD_LENGTH);
@@ -229,14 +230,15 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
    if (codePointCountToAdd <= 0) {
        return false;
    }
    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
            unigramProperty, &addedNewUnigram)) {
    const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
    if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(),
            codePointArrayView.size(), unigramProperty, &addedNewUnigram)) {
        if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
            mUnigramCount++;
        }
        if (unigramProperty->getShortcuts().size() > 0) {
            // Add shortcut target.
            const int wordPos = getTerminalPtNodePositionOfWord(word, length,
            const int wordPos = getTerminalPtNodePositionOfWord(codePointArrayView,
                    false /* forceLowerCaseSearch */);
            if (wordPos == NOT_A_DICT_POS) {
                AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
@@ -259,12 +261,12 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int le
    }
}

bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCodePoints) {
    if (!mBuffers->isUpdatable()) {
        AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
        return false;
    }
    const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
    const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
            false /* forceLowerCaseSearch */);
    if (ptNodePos == NOT_A_DICT_POS) {
        return false;
@@ -305,7 +307,6 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
                    false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
                    NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
            if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
                    prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
                    &beginningOfSentenceUnigramProperty)) {
                AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
                return false;
@@ -318,8 +319,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
        }
    }
    const int word1Pos = getTerminalPtNodePositionOfWord(
            bigramProperty->getTargetCodePoints()->data(),
            bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
            CodePointArrayView(*bigramProperty->getTargetCodePoints()),
            false /* forceLowerCaseSearch */);
    if (word1Pos == NOT_A_DICT_POS) {
        return false;
    }
@@ -336,7 +337,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}

bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
        const int *const word, const int length) {
        const CodePointArrayView wordCodePoints) {
    if (!mBuffers->isUpdatable()) {
        AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
        return false;
@@ -350,8 +351,9 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
        AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
        return false;
    }
    if (length > MAX_WORD_LENGTH) {
        AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
    if (wordCodePoints.size() > MAX_WORD_LENGTH) {
        AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %zd",
                wordCodePoints.size());
    }
    int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
@@ -360,7 +362,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
    if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
        return false;
    }
    const int wordPos = getTerminalPtNodePositionOfWord(word, length,
    const int wordPos = getTerminalPtNodePositionOfWord(wordCodePoints,
            false /* forceLowerCaseSearch */);
    if (wordPos == NOT_A_DICT_POS) {
        return false;
@@ -445,9 +447,9 @@ void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int quer
    }
}

const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
        const int codePointCount) const {
    const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
        const CodePointArrayView wordCodePoints) const {
    const int ptNodePos = getTerminalPtNodePositionOfWord(wordCodePoints,
            false /* forceLowerCaseSearch */);
    if (ptNodePos == NOT_A_DICT_POS) {
        AKLOGE("getWordProperty is called for invalid word.");
Loading