Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 65330d28 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Implement simple dictionary decay."

parents ef084229 fd02b2d6
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.LatinImeLogger;
import com.android.inputmethod.latin.makedict.DictDecoder;
@@ -50,6 +51,9 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
    /** Any pair being typed or picked */
    public static final int FREQUENCY_FOR_TYPED = 2;

    public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
    public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;

    /** Locale for which this user history dictionary is storing words */
    private final String mLocale;

@@ -131,14 +135,17 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
                (word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
            return;
        }
        addWordDynamically(word1, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED,
        final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
                (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
                        FREQUENCY_FOR_TYPED;
        addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
                false /* isNotAWord */);
        // Do not insert a word as a bigram of itself
        if (word1.equals(word0)) {
            return;
        }
        if (null != word0) {
            addBigramDynamically(word0, word1, FREQUENCY_FOR_TYPED, isValid);
            addBigramDynamically(word0, word1, frequency, isValid);
        }
    }

+1 −0
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \
    $(addprefix suggest/policyimpl/dictionary/utils/, \
        buffer_with_extendable_buffer.cpp \
        byte_array_utils.cpp \
        decaying_utils.cpp \
        dict_file_writing_utils.cpp \
        format_utils.cpp) \
    suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
+56 −7
Original line number Diff line number Diff line
@@ -17,10 +17,10 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"

#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"

namespace latinime {

@@ -41,9 +41,14 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
    if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
        originalBigramPos += mBuffer->getOriginalBufferSize();
    }
    *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
    *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
    *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
    if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) {
        // This bigram is too weak to output.
        *outBigramPos = NOT_A_DICT_POS;
    } else {
        *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
    }
    if (usesAdditionalBuffer) {
        *bigramEntryPos += mBuffer->getOriginalBufferSize();
    }
@@ -153,15 +158,21 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
        const int bigramTargetNodePos =
                followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
        nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
        // TODO: Update probability for supporting probability decaying.
        if (nodeReader.isDeleted() || !nodeReader.isTerminal()
                || bigramTargetNodePos == NOT_A_DICT_POS) {
            // The target is no longer valid terminal. Invalidate the current bigram entry.
            if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
                    NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
                    NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
                return false;
            }
        } else {
            continue;
        }
        bool isRemoved = false;
        if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
                &isRemoved)) {
            return false;
        }
        if (!isRemoved) {
            (*outValidBigramEntryCount) += 1;
        }
    } while(BigramListReadWriteUtils::hasNext(bigramFlags));
@@ -247,8 +258,14 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
        if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
            // Update this bigram entry.
            *outAddedNewBigram = false;
            const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
                    bigramFlags);
            const int probabilityToWrite = mIsDecayingDict ?
                    DecayingUtils::getUpdatedBigramProbabilityDelta(
                            originalProbability, probability) : probability;
            const BigramListReadWriteUtils::BigramFlags updatedFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
                    BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
                            probabilityToWrite);
            return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
                    originalBigramPos, &entryPos);
        }
@@ -276,8 +293,11 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
        int *const writingPos) {
    // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
    const int probabilityToWrite = mIsDecayingDict ?
            DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
                    probability;
    return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
            probability, false /* hasNext */, writingPos);
            probabilityToWrite, false /* hasNext */, writingPos);
}

bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
@@ -339,4 +359,33 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
    return currentPos;
}

bool DynamicBigramListPolicy::updateProbabilityForDecay(
        BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
        int *const bigramEntryPos, bool *const outRemoved) const {
    *outRemoved = false;
    if (mIsDecayingDict) {
        // Update bigram probability for decaying.
        const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave(
                BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
        if (DecayingUtils::isValidBigram(newProbability)) {
            // Write new probability.
            const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(
                            bigramFlags, newProbability);
            if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
                    targetPtNodePos, bigramEntryPos)) {
                return false;
            }
        } else {
            // Remove current bigram entry.
            *outRemoved = true;
            if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
                    NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
                return false;
            }
        }
    }
    return true;
}

} // namespace latinime
+8 −2
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@

#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"

namespace latinime {
@@ -34,8 +35,9 @@ class DictionaryShortcutsStructurePolicy;
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
 public:
    DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
            const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
            : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
            const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
            const bool isDecayingDict)
            : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}

    ~DynamicBigramListPolicy() {}

@@ -74,9 +76,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {

    BufferWithExtendableBuffer *const mBuffer;
    const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
    const bool mIsDecayingDict;

    // Follow bigram link and return the position of bigram target PtNode that is currently valid.
    int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;

    bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
            const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+15 −0
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@

#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"

#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"

namespace latinime {

bool DynamicPatriciaTrieGcEventListeners
@@ -25,6 +27,19 @@ bool DynamicPatriciaTrieGcEventListeners
    // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
    // children.
    bool isUselessPtNode = !node->isTerminal();
    if (node->isTerminal() && mIsDecayingDict) {
        const int newProbability =
                DecayingUtils::getUnigramProbabilityToSave(node->getProbability());
        int writingPos = node->getProbabilityFieldPos();
        // Update probability.
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
                mBuffer, newProbability, &writingPos)) {
            return false;
        }
        if (!DecayingUtils::isValidUnigram(newProbability)) {
            isUselessPtNode = false;
        }
    }
    if (mChildrenValue > 0) {
        isUselessPtNode = false;
    } else if (node->isTerminal()) {
Loading