Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 21ce9c4a authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi Committed by Android (Google) Code Review
Browse files

Merge "Stochastic decay."

parents 2dcb6337 c76bbcee
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
    }
    *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
    *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
    if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) {
    if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
        // This bigram is too weak to output.
        *outBigramPos = NOT_A_DICT_POS;
    } else {
@@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
            const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
                    bigramFlags);
            const int probabilityToWrite = mIsDecayingDict ?
                    ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
                            originalProbability, probability) : probability;
                    ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
                            probability) : probability;
            const BigramListReadWriteUtils::BigramFlags updatedFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
                            probabilityToWrite);
@@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con
        int *const writingPos) {
    // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
    const int probabilityToWrite = mIsDecayingDict ?
            ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
            ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
                    probability;
    return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
            probabilityToWrite, false /* hasNext */, writingPos);
@@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay(
    *outRemoved = false;
    if (mIsDecayingDict) {
        // Update bigram probability for decaying.
        const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
        const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
                BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
        if (ForgettingCurveUtils::isValidBigram(newProbability)) {
        if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
            // Write new probability.
            const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(
+2 −2
Original line number Diff line number Diff line
@@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners
    bool isUselessPtNode = !node->isTerminal();
    if (node->isTerminal() && mIsDecayingDict) {
        const int newProbability =
                ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability());
                ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
        int writingPos = node->getProbabilityFieldPos();
        // Update probability.
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
                mBuffer, newProbability, &writingPos)) {
            return false;
        }
        if (!ForgettingCurveUtils::isValidUnigram(newProbability)) {
        if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
            isUselessPtNode = false;
        }
    }
+1 −1
Original line number Diff line number Diff line
@@ -545,7 +545,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
        const int newProbability) {
    if (mNeedsToDecay) {
        return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability,
        return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
                newProbability);
    } else {
        return newProbability;
+49 −62
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
 * limitations under the License.
 */

#include <stdlib.h>

#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"

#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -26,106 +28,91 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;

const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120;
const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8;
const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;

/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
        const int encodedBigramProbabilityDelta) {
        const int encodedBigramProbability) {
    if (encodedUnigramProbability == NOT_A_PROBABILITY) {
        return NOT_A_PROBABILITY;
    } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
        const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
                encodedUnigramProbability));
        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
    } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
        return backoff(decodeUnigramProbability(encodedUnigramProbability));
    } else {
        const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
                decodeUnigramProbability(encodedUnigramProbability),
                decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
        const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability);
        const int bigramProbability = decodeBigramProbability(encodedBigramProbability);
        return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
    }
}

/* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability(
// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
// (i.e. unigram probability + bigram probability delta).
/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
        const int originalEncodedProbability, const int newProbability) {
    if (originalEncodedProbability == NOT_A_PROBABILITY) {
        // The unigram is not in this dictionary.
        if (newProbability == NOT_A_PROBABILITY) {
            // The unigram is not in other dictionaries.
            return 0;
        } else {
            return MIN_VALID_UNIGRAM_PROBABILITY;
        }
    } else {
        if (newProbability != NOT_A_PROBABILITY
                && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
            return MIN_VALID_UNIGRAM_PROBABILITY;
        }
        return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
    }
}

/* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) {
    return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
}

/* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
        const int encodedProbabilityDelta) {
    return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
}

/* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
        const int originalEncodedProbabilityDelta, const int newProbability) {
    if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
        // The bigram relation is not in this dictionary.
        if (newProbability == NOT_A_PROBABILITY) {
            // The bigram target is not in other dictionaries.
            return 0;
        } else {
            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
            return MIN_VALID_ENCODED_PROBABILITY;
        }
    } else {
        if (newProbability != NOT_A_PROBABILITY
                && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
                && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
            return MIN_VALID_ENCODED_PROBABILITY;
        }
        return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
                MAX_BIGRAM_PROBABILITY_DELTA);
        return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
    }
}

/* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) {
    return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
    return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}

/* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
    return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
    const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
    // TODO: Implement the decay in more proper way.
    const float currentRate = static_cast<float>(currentEncodedProbability)
            / static_cast<float>(MAX_ENCODED_PROBABILITY);
    const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
            + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
    const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    if (thresholdToDecay < randValue) {
        return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
    } else {
        return currentEncodedProbability;
    }
}

/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
    const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
    const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
    if (probability < 0) {
        return NOT_A_PROBABILITY;
    } else {
        return min(probability, MAX_UNIGRAM_PROBABILITY);
        return min(probability, MAX_ENCODED_PROBABILITY) * 8;
    }
}

/* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta(
        const int encodedProbabilityDelta) {
    const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
    if (probabilityDelta < 0) {
/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
    const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
    if (probability < 0) {
        return NOT_A_PROBABILITY;
    } else {
        return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
        return min(probability, MAX_ENCODED_PROBABILITY) * 8;
    }
}

/* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) {
    return rawProbability;
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
    if (unigramProbability == NOT_A_PROBABILITY) {
        return NOT_A_PROBABILITY;
    } else {
        return max(unigramProbability - 8, 0);
    }
}

} // namespace latinime
+11 −20
Original line number Diff line number Diff line
@@ -24,7 +24,6 @@ namespace latinime {
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
// TODO: Quit using bigram probability delta.
class ForgettingCurveUtils {
 public:
    static const int MAX_UNIGRAM_COUNT;
@@ -33,38 +32,30 @@ class ForgettingCurveUtils {
    static const int MAX_BIGRAM_COUNT_AFTER_GC;

    static int getProbability(const int encodedUnigramProbability,
            const int encodedBigramProbabilityDelta);
            const int encodedBigramProbability);

    static int getUpdatedUnigramProbability(const int originalEncodedProbability,
    static int getUpdatedEncodedProbability(const int originalEncodedProbability,
            const int newProbability);

    static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
            const int newProbability);

    static int isValidUnigram(const int encodedUnigramProbability);

    static int isValidBigram(const int encodedProbabilityDelta);
    static int isValidEncodedProbability(const int encodedProbability);

    static int getUnigramProbabilityToSave(const int encodedProbability);

    static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
    static int getEncodedProbabilityToSave(const int encodedProbability);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);

    static const int MAX_COMPUTED_PROBABILITY;
    static const int MAX_UNIGRAM_PROBABILITY;
    static const int MIN_VALID_UNIGRAM_PROBABILITY;
    static const int UNIGRAM_PROBABILITY_STEP;
    static const int MAX_BIGRAM_PROBABILITY_DELTA;
    static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
    static const int BIGRAM_PROBABILITY_DELTA_STEP;
    static const int MAX_ENCODED_PROBABILITY;
    static const int MIN_VALID_ENCODED_PROBABILITY;
    static const int ENCODED_PROBABILITY_STEP;

    static const float MIN_PROBABILITY_TO_DECAY;

    static int decodeUnigramProbability(const int encodedProbability);

    static int decodeBigramProbabilityDelta(const int encodedProbability);
    static int decodeBigramProbability(const int encodedProbability);

    static int getDecayedProbability(const int rawProbability);
    static int backoff(const int unigramProbability);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
Loading