Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c76bbcee authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Stochastic decay.

Bug: 6669677
Change-Id: Ib2d9228b951c77dab7a8675ce9db60677e87e771
parent 13d5dc91
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
    }
    *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
    *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
    if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) {
    if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) {
        // This bigram is too weak to output.
        *outBigramPos = NOT_A_DICT_POS;
    } else {
@@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
            const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
                    bigramFlags);
            const int probabilityToWrite = mIsDecayingDict ?
                    ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
                            originalProbability, probability) : probability;
                    ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
                            probability) : probability;
            const BigramListReadWriteUtils::BigramFlags updatedFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
                            probabilityToWrite);
@@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con
        int *const writingPos) {
    // hasNext is false because we are adding a new bigram entry at the end of the bigram list.
    const int probabilityToWrite = mIsDecayingDict ?
            ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
            ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) :
                    probability;
    return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
            probabilityToWrite, false /* hasNext */, writingPos);
@@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay(
    *outRemoved = false;
    if (mIsDecayingDict) {
        // Update bigram probability for decaying.
        const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
        const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave(
                BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
        if (ForgettingCurveUtils::isValidBigram(newProbability)) {
        if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
            // Write new probability.
            const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
                    BigramListReadWriteUtils::setProbabilityInFlags(
+2 −2
Original line number Diff line number Diff line
@@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners
    bool isUselessPtNode = !node->isTerminal();
    if (node->isTerminal() && mIsDecayingDict) {
        const int newProbability =
                ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability());
                ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability());
        int writingPos = node->getProbabilityFieldPos();
        // Update probability.
        if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
                mBuffer, newProbability, &writingPos)) {
            return false;
        }
        if (!ForgettingCurveUtils::isValidUnigram(newProbability)) {
        if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
            isUselessPtNode = false;
        }
    }
+1 −1
Original line number Diff line number Diff line
@@ -545,7 +545,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
        const int newProbability) {
    if (mNeedsToDecay) {
        return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability,
        return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
                newProbability);
    } else {
        return newProbability;
+49 −62
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
 * limitations under the License.
 */

#include <stdlib.h>

#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"

#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
@@ -26,106 +28,91 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000;
const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;

const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127;
const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120;
const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8;
const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15;
const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3;
const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1;
// Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected
// duration of the decay is approximately 66hours.
const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f;

/* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability,
        const int encodedBigramProbabilityDelta) {
        const int encodedBigramProbability) {
    if (encodedUnigramProbability == NOT_A_PROBABILITY) {
        return NOT_A_PROBABILITY;
    } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
        const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
                encodedUnigramProbability));
        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
    } else if (encodedBigramProbability == NOT_A_PROBABILITY) {
        return backoff(decodeUnigramProbability(encodedUnigramProbability));
    } else {
        const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
                decodeUnigramProbability(encodedUnigramProbability),
                decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
        return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
        const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability);
        const int bigramProbability = decodeBigramProbability(encodedBigramProbability);
        return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY);
    }
}

/* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability(
// Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding
// (i.e. unigram probability + bigram probability delta).
/* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability(
        const int originalEncodedProbability, const int newProbability) {
    if (originalEncodedProbability == NOT_A_PROBABILITY) {
        // The unigram is not in this dictionary.
        if (newProbability == NOT_A_PROBABILITY) {
            // The unigram is not in other dictionaries.
            return 0;
        } else {
            return MIN_VALID_UNIGRAM_PROBABILITY;
        }
    } else {
        if (newProbability != NOT_A_PROBABILITY
                && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
            return MIN_VALID_UNIGRAM_PROBABILITY;
        }
        return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
    }
}

/* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) {
    return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
}

/* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave(
        const int encodedProbabilityDelta) {
    return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
}

/* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(
        const int originalEncodedProbabilityDelta, const int newProbability) {
    if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
        // The bigram relation is not in this dictionary.
        if (newProbability == NOT_A_PROBABILITY) {
            // The bigram target is not in other dictionaries.
            return 0;
        } else {
            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
            return MIN_VALID_ENCODED_PROBABILITY;
        }
    } else {
        if (newProbability != NOT_A_PROBABILITY
                && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
            return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
                && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) {
            return MIN_VALID_ENCODED_PROBABILITY;
        }
        return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
                MAX_BIGRAM_PROBABILITY_DELTA);
        return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY);
    }
}

/* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) {
    return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
/* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) {
    return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY;
}

/* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
    return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
/* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) {
    const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0);
    // TODO: Implement the decay in more proper way.
    const float currentRate = static_cast<float>(currentEncodedProbability)
            / static_cast<float>(MAX_ENCODED_PROBABILITY);
    const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY
            + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate);
    const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    if (thresholdToDecay < randValue) {
        return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0);
    } else {
        return currentEncodedProbability;
    }
}

/* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) {
    const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
    const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
    if (probability < 0) {
        return NOT_A_PROBABILITY;
    } else {
        return min(probability, MAX_UNIGRAM_PROBABILITY);
        return min(probability, MAX_ENCODED_PROBABILITY) * 8;
    }
}

/* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta(
        const int encodedProbabilityDelta) {
    const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
    if (probabilityDelta < 0) {
/* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) {
    const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY;
    if (probability < 0) {
        return NOT_A_PROBABILITY;
    } else {
        return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
        return min(probability, MAX_ENCODED_PROBABILITY) * 8;
    }
}

/* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) {
    return rawProbability;
// See comments in ProbabilityUtils::backoff().
/* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) {
    if (unigramProbability == NOT_A_PROBABILITY) {
        return NOT_A_PROBABILITY;
    } else {
        return max(unigramProbability - 8, 0);
    }
}

} // namespace latinime
+11 −20
Original line number Diff line number Diff line
@@ -24,7 +24,6 @@ namespace latinime {
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
// required to introduced to each terminal PtNode and bigram entry.
// TODO: Quit using bigram probability to indicate the delta.
// TODO: Quit using bigram probability delta.
class ForgettingCurveUtils {
 public:
    static const int MAX_UNIGRAM_COUNT;
@@ -33,38 +32,30 @@ class ForgettingCurveUtils {
    static const int MAX_BIGRAM_COUNT_AFTER_GC;

    static int getProbability(const int encodedUnigramProbability,
            const int encodedBigramProbabilityDelta);
            const int encodedBigramProbability);

    static int getUpdatedUnigramProbability(const int originalEncodedProbability,
    static int getUpdatedEncodedProbability(const int originalEncodedProbability,
            const int newProbability);

    static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
            const int newProbability);

    static int isValidUnigram(const int encodedUnigramProbability);

    static int isValidBigram(const int encodedProbabilityDelta);
    static int isValidEncodedProbability(const int encodedProbability);

    static int getUnigramProbabilityToSave(const int encodedProbability);

    static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
    static int getEncodedProbabilityToSave(const int encodedProbability);

 private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils);

    static const int MAX_COMPUTED_PROBABILITY;
    static const int MAX_UNIGRAM_PROBABILITY;
    static const int MIN_VALID_UNIGRAM_PROBABILITY;
    static const int UNIGRAM_PROBABILITY_STEP;
    static const int MAX_BIGRAM_PROBABILITY_DELTA;
    static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
    static const int BIGRAM_PROBABILITY_DELTA_STEP;
    static const int MAX_ENCODED_PROBABILITY;
    static const int MIN_VALID_ENCODED_PROBABILITY;
    static const int ENCODED_PROBABILITY_STEP;

    static const float MIN_PROBABILITY_TO_DECAY;

    static int decodeUnigramProbability(const int encodedProbability);

    static int decodeBigramProbabilityDelta(const int encodedProbability);
    static int decodeBigramProbability(const int encodedProbability);

    static int getDecayedProbability(const int rawProbability);
    static int backoff(const int unigramProbability);
};
} // namespace latinime
#endif /* LATINIME_FORGETTING_CURVE_UTILS_H */
Loading