Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7a661698 authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android Git Automerger
Browse files

am 12d3bd22: am 65330d28: Merge "Implement simple dictionary decay."

* commit '12d3bd22':
  Implement simple dictionary decay.
parents c8a82edf 12d3bd22
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ import android.util.Log;

import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.LatinImeLogger;
import com.android.inputmethod.latin.makedict.DictDecoder;
@@ -50,6 +51,9 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
    /** Any pair being typed or picked */
    public static final int FREQUENCY_FOR_TYPED = 2;

    public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
    public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;

    /** Locale for which this user history dictionary is storing words */
    private final String mLocale;

@@ -131,14 +135,17 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
                (word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
            return;
        }
        addWordDynamically(word1, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED,
        final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
                (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
                        FREQUENCY_FOR_TYPED;
        addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
                false /* isNotAWord */);
        // Do not insert a word as a bigram of itself
        if (word1.equals(word0)) {
            return;
        }
        if (null != word0) {
            addBigramDynamically(word0, word1, FREQUENCY_FOR_TYPED, isValid);
            addBigramDynamically(word0, word1, frequency, isValid);
        }
    }

+1 −0
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \
    $(addprefix suggest/policyimpl/dictionary/utils/, \
        buffer_with_extendable_buffer.cpp \
        byte_array_utils.cpp \
        decaying_utils.cpp \
        dict_file_writing_utils.cpp \
        format_utils.cpp) \
    suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
+56 −7
Original line number Diff line number Diff line
@@ -17,10 +17,10 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"

#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"

namespace latinime {

@@ -41,9 +41,14 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
    if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
        originalBigramPos += mBuffer->getOriginalBufferSize();
    }
    *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
    *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
    *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
    if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) {
        // This bigram is too weak to output.
        *outBigramPos = NOT_A_DICT_POS;
    } else {
        *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
    }
    if (usesAdditionalBuffer) {
        *bigramEntryPos += mBuffer->getOriginalBufferSize();
    }
@@ -153,15 +158,21 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
        const int bigramTargetNodePos =
                followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
        nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
        // TODO: Update probability for supporting probability decaying.
        if (nodeReader.isDeleted() || !nodeReader.isTerminal()
                || bigramTargetNodePos == NOT_A_DICT_POS) {
            // The target is no longer valid terminal. Invalidate the current bigram entry.
            if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
                    NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
                    NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
                return false;
            }
        } else {
            continue;
        }
        bool isRemoved = false;
        if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
                &isRemoved)) {
            return false;
        }
        if (!isRemoved) {
            (*outValidBigramEntryCount) += 1;
        }
    } while(BigramListReadWriteUtils::hasNext(bigramFlags));
@@ -247,8 +258,14 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
        if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
            // Update this bigram entry.
            *outAddedNewBigram = false;
            const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
                    bigramFlags);
            const int probabilityToWrite = mIsDecayingDict ?
                    DecayingUtils::getUpdatedBigramProbabilityDelta(
                            originalProbability, probability) : probability;
            const BigramListReadWriteUtils::BigramFlags updatedFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
                    BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
                            probabilityToWrite);
            return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
                    originalBigramPos, &entryPos);
        }
@@ -276,8 +293,11 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
        int *const writingPos) {
    // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
    const int probabilityToWrite = mIsDecayingDict ?
            DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
                    probability;
    return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
            probability, false /* hasNext */, writingPos);
            probabilityToWrite, false /* hasNext */, writingPos);
}

bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
@@ -339,4 +359,33 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
    return currentPos;
}

bool DynamicBigramListPolicy::updateProbabilityForDecay(
        BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
        int *const bigramEntryPos, bool *const outRemoved) const {
    *outRemoved = false;
    if (mIsDecayingDict) {
        // Update bigram probability for decaying.
        const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave(
                BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
        if (DecayingUtils::isValidBigram(newProbability)) {
            // Write new probability.
            const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(
                            bigramFlags, newProbability);
            if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
                    targetPtNodePos, bigramEntryPos)) {
                return false;
            }
        } else {
            // Remove current bigram entry.
            *outRemoved = true;
            if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
                    NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
                return false;
            }
        }
    }
    return true;
}

} // namespace latinime
+8 −2
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@

#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"

namespace latinime {
@@ -34,8 +35,9 @@ class DictionaryShortcutsStructurePolicy;
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
 public:
    DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
            const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
            : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
            const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
            const bool isDecayingDict)
            : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}

    ~DynamicBigramListPolicy() {}

@@ -74,9 +76,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {

    BufferWithExtendableBuffer *const mBuffer;
    const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
    const bool mIsDecayingDict;

    // Follow bigram link and return the position of bigram target PtNode that is currently valid.
    int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;

    bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
            const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
+15 −0
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@

#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"

#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"

namespace latinime {

bool DynamicPatriciaTrieGcEventListeners
@@ -25,6 +27,19 @@ bool DynamicPatriciaTrieGcEventListeners
    // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
    // children.
    bool isUselessPtNode = !node->isTerminal();
    if (node->isTerminal() && mIsDecayingDict) {
        const int newProbability =
                DecayingUtils::getUnigramProbabilityToSave(node->getProbability());
        int writingPos = node->getProbabilityFieldPos();
        // Update probability.
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
                mBuffer, newProbability, &writingPos)) {
            return false;
        }
        if (!DecayingUtils::isValidUnigram(newProbability)) {
            isUselessPtNode = false;
        }
    }
    if (mChildrenValue > 0) {
        isUselessPtNode = false;
    } else if (node->isTerminal()) {
Loading