Loading native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const } *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) { if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) { // This bigram is too weak to output. *outBigramPos = NOT_A_DICT_POS; } else { Loading Loading @@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( bigramFlags); const int probabilityToWrite = mIsDecayingDict ? ForgettingCurveUtils::getUpdatedBigramProbabilityDelta( originalProbability, probability) : probability; ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, probability) : probability; const BigramListReadWriteUtils::BigramFlags updatedFlags = BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probabilityToWrite); Loading Loading @@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con int *const writingPos) { // hasNext is false because we are adding a new bigram entry at the end of the bigram list. const int probabilityToWrite = mIsDecayingDict ? ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) : probability; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, probabilityToWrite, false /* hasNext */, writingPos); Loading Loading @@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay( *outRemoved = false; if (mIsDecayingDict) { // Update bigram probability for decaying. const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave( const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); if (ForgettingCurveUtils::isValidBigram(newProbability)) { if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { // Write new probability. const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = BigramListReadWriteUtils::setProbabilityInFlags( Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners bool isUselessPtNode = !node->isTerminal(); if (node->isTerminal() && mIsDecayingDict) { const int newProbability = ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability()); ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability()); int writingPos = node->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( mBuffer, newProbability, &writingPos)) { return false; } if (!ForgettingCurveUtils::isValidUnigram(newProbability)) { if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { isUselessPtNode = false; } } Loading native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -545,7 +545,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, const int newProbability) { if (mNeedsToDecay) { return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability, return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, newProbability); } else { return newProbability; Loading native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +49 −62 Original line number Diff line number Diff line Loading @@ -14,6 +14,8 @@ * limitations under the License. */ #include <stdlib.h> #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" Loading @@ -26,106 +28,91 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120; const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8; const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15; const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3; const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; // Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected // duration of the decay is approximately 66hours. const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, const int encodedBigramProbabilityDelta) { const int encodedBigramProbability) { if (encodedUnigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) { const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability( encodedUnigramProbability)); return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); } else if (encodedBigramProbability == NOT_A_PROBABILITY) { return backoff(decodeUnigramProbability(encodedUnigramProbability)); } else { const int rawProbability = ProbabilityUtils::computeProbabilityForBigram( decodeUnigramProbability(encodedUnigramProbability), decodeBigramProbabilityDelta(encodedBigramProbabilityDelta)); return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability); const int bigramProbability = decodeBigramProbability(encodedBigramProbability); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); } } /* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability( // Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding // (i.e. unigram probability + bigram probability delta). /* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability( const int originalEncodedProbability, const int newProbability) { if (originalEncodedProbability == NOT_A_PROBABILITY) { // The unigram is not in this dictionary. if (newProbability == NOT_A_PROBABILITY) { // The unigram is not in other dictionaries. return 0; } else { return MIN_VALID_UNIGRAM_PROBABILITY; } } else { if (newProbability != NOT_A_PROBABILITY && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) { return MIN_VALID_UNIGRAM_PROBABILITY; } return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY); } } /* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) { return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); } /* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave( const int encodedProbabilityDelta) { return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); } /* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta( const int originalEncodedProbabilityDelta, const int newProbability) { if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { // The bigram relation is not in this dictionary. if (newProbability == NOT_A_PROBABILITY) { // The bigram target is not in other dictionaries. return 0; } else { return MIN_VALID_BIGRAM_PROBABILITY_DELTA; return MIN_VALID_ENCODED_PROBABILITY; } } else { if (newProbability != NOT_A_PROBABILITY && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) { return MIN_VALID_BIGRAM_PROBABILITY_DELTA; && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) { return MIN_VALID_ENCODED_PROBABILITY; } return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP, MAX_BIGRAM_PROBABILITY_DELTA); return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY); } } /* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) { return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; /* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) { return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; } /* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) { return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; /* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) { const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); // TODO: Implement the decay in more proper way. const float currentRate = static_cast<float>(currentEncodedProbability) / static_cast<float>(MAX_ENCODED_PROBABILITY); const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate); const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); if (thresholdToDecay < randValue) { return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0); } else { return currentEncodedProbability; } } /* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) { const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; if (probability < 0) { return NOT_A_PROBABILITY; } else { return min(probability, MAX_UNIGRAM_PROBABILITY); return min(probability, MAX_ENCODED_PROBABILITY) * 8; } } /* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta( const int encodedProbabilityDelta) { const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; if (probabilityDelta < 0) { /* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) { const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; if (probability < 0) { return NOT_A_PROBABILITY; } else { return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA); return min(probability, MAX_ENCODED_PROBABILITY) * 8; } } /* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) { return rawProbability; // See comments in ProbabilityUtils::backoff(). /* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { if (unigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else { return max(unigramProbability - 8, 0); } } } // namespace latinime native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +11 −20 Original line number Diff line number Diff line Loading @@ -24,7 +24,6 @@ namespace latinime { // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is // required to introduced to each terminal PtNode and bigram entry. // TODO: Quit using bigram probability to indicate the delta. // TODO: Quit using bigram probability delta. class ForgettingCurveUtils { public: static const int MAX_UNIGRAM_COUNT; Loading @@ -33,38 +32,30 @@ class ForgettingCurveUtils { static const int MAX_BIGRAM_COUNT_AFTER_GC; static int getProbability(const int encodedUnigramProbability, const int encodedBigramProbabilityDelta); const int encodedBigramProbability); static int getUpdatedUnigramProbability(const int originalEncodedProbability, static int getUpdatedEncodedProbability(const int originalEncodedProbability, const int newProbability); static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta, const int newProbability); static int isValidUnigram(const int encodedUnigramProbability); static int isValidBigram(const int encodedProbabilityDelta); static int isValidEncodedProbability(const int encodedProbability); static int getUnigramProbabilityToSave(const int encodedProbability); static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); static int getEncodedProbabilityToSave(const int encodedProbability); private: DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); static const int MAX_COMPUTED_PROBABILITY; static const int MAX_UNIGRAM_PROBABILITY; static const int MIN_VALID_UNIGRAM_PROBABILITY; static const int UNIGRAM_PROBABILITY_STEP; static const int MAX_BIGRAM_PROBABILITY_DELTA; static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA; static const int BIGRAM_PROBABILITY_DELTA_STEP; static const int MAX_ENCODED_PROBABILITY; static const int MIN_VALID_ENCODED_PROBABILITY; static const int ENCODED_PROBABILITY_STEP; static const float MIN_PROBABILITY_TO_DECAY; static int decodeUnigramProbability(const int encodedProbability); static int decodeBigramProbabilityDelta(const int encodedProbability); static int decodeBigramProbability(const int encodedProbability); static int getDecayedProbability(const int rawProbability); static int backoff(const int unigramProbability); }; } // namespace latinime #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */ Loading
native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -43,7 +43,7 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const } *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); if (mIsDecayingDict && !ForgettingCurveUtils::isValidBigram(*outProbability)) { if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) { // This bigram is too weak to output. *outBigramPos = NOT_A_DICT_POS; } else { Loading Loading @@ -261,8 +261,8 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( bigramFlags); const int probabilityToWrite = mIsDecayingDict ? ForgettingCurveUtils::getUpdatedBigramProbabilityDelta( originalProbability, probability) : probability; ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, probability) : probability; const BigramListReadWriteUtils::BigramFlags updatedFlags = BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probabilityToWrite); Loading Loading @@ -294,7 +294,7 @@ bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, con int *const writingPos) { // hasNext is false because we are adding a new bigram entry at the end of the bigram list. const int probabilityToWrite = mIsDecayingDict ? ForgettingCurveUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) : ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) : probability; return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, probabilityToWrite, false /* hasNext */, writingPos); Loading Loading @@ -365,9 +365,9 @@ bool DynamicBigramListPolicy::updateProbabilityForDecay( *outRemoved = false; if (mIsDecayingDict) { // Update bigram probability for decaying. const int newProbability = ForgettingCurveUtils::getBigramProbabilityDeltaToSave( const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags)); if (ForgettingCurveUtils::isValidBigram(newProbability)) { if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { // Write new probability. const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = BigramListReadWriteUtils::setProbabilityInFlags( Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp +2 −2 Original line number Diff line number Diff line Loading @@ -29,14 +29,14 @@ bool DynamicPatriciaTrieGcEventListeners bool isUselessPtNode = !node->isTerminal(); if (node->isTerminal() && mIsDecayingDict) { const int newProbability = ForgettingCurveUtils::getUnigramProbabilityToSave(node->getProbability()); ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability()); int writingPos = node->getProbabilityFieldPos(); // Update probability. if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition( mBuffer, newProbability, &writingPos)) { return false; } if (!ForgettingCurveUtils::isValidUnigram(newProbability)) { if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { isUselessPtNode = false; } } Loading
native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -545,7 +545,7 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, const int newProbability) { if (mNeedsToDecay) { return ForgettingCurveUtils::getUpdatedUnigramProbability(originalProbability, return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, newProbability); } else { return newProbability; Loading
native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.cpp +49 −62 Original line number Diff line number Diff line Loading @@ -14,6 +14,8 @@ * limitations under the License. */ #include <stdlib.h> #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" #include "suggest/policyimpl/dictionary/utils/probability_utils.h" Loading @@ -26,106 +28,91 @@ const int ForgettingCurveUtils::MAX_BIGRAM_COUNT = 12000; const int ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000; const int ForgettingCurveUtils::MAX_COMPUTED_PROBABILITY = 127; const int ForgettingCurveUtils::MAX_UNIGRAM_PROBABILITY = 120; const int ForgettingCurveUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24; const int ForgettingCurveUtils::UNIGRAM_PROBABILITY_STEP = 8; const int ForgettingCurveUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15; const int ForgettingCurveUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3; const int ForgettingCurveUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1; const int ForgettingCurveUtils::MAX_ENCODED_PROBABILITY = 15; const int ForgettingCurveUtils::MIN_VALID_ENCODED_PROBABILITY = 3; const int ForgettingCurveUtils::ENCODED_PROBABILITY_STEP = 1; // Currently, we try to decay each uni/bigram once every 2 hours. Accordingly, the expected // duration of the decay is approximately 66hours. const float ForgettingCurveUtils::MIN_PROBABILITY_TO_DECAY = 0.03f; /* static */ int ForgettingCurveUtils::getProbability(const int encodedUnigramProbability, const int encodedBigramProbabilityDelta) { const int encodedBigramProbability) { if (encodedUnigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) { const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability( encodedUnigramProbability)); return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); } else if (encodedBigramProbability == NOT_A_PROBABILITY) { return backoff(decodeUnigramProbability(encodedUnigramProbability)); } else { const int rawProbability = ProbabilityUtils::computeProbabilityForBigram( decodeUnigramProbability(encodedUnigramProbability), decodeBigramProbabilityDelta(encodedBigramProbabilityDelta)); return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY); const int unigramProbability = decodeUnigramProbability(encodedUnigramProbability); const int bigramProbability = decodeBigramProbability(encodedBigramProbability); return min(max(unigramProbability, bigramProbability), MAX_COMPUTED_PROBABILITY); } } /* static */ int ForgettingCurveUtils::getUpdatedUnigramProbability( // Caveat: Unlike getProbability(), this method doesn't assume special bigram probability encoding // (i.e. unigram probability + bigram probability delta). /* static */ int ForgettingCurveUtils::getUpdatedEncodedProbability( const int originalEncodedProbability, const int newProbability) { if (originalEncodedProbability == NOT_A_PROBABILITY) { // The unigram is not in this dictionary. if (newProbability == NOT_A_PROBABILITY) { // The unigram is not in other dictionaries. return 0; } else { return MIN_VALID_UNIGRAM_PROBABILITY; } } else { if (newProbability != NOT_A_PROBABILITY && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) { return MIN_VALID_UNIGRAM_PROBABILITY; } return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY); } } /* static */ int ForgettingCurveUtils::getUnigramProbabilityToSave(const int encodedProbability) { return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0); } /* static */ int ForgettingCurveUtils::getBigramProbabilityDeltaToSave( const int encodedProbabilityDelta) { return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0); } /* static */ int ForgettingCurveUtils::getUpdatedBigramProbabilityDelta( const int originalEncodedProbabilityDelta, const int newProbability) { if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) { // The bigram relation is not in this dictionary. if (newProbability == NOT_A_PROBABILITY) { // The bigram target is not in other dictionaries. return 0; } else { return MIN_VALID_BIGRAM_PROBABILITY_DELTA; return MIN_VALID_ENCODED_PROBABILITY; } } else { if (newProbability != NOT_A_PROBABILITY && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) { return MIN_VALID_BIGRAM_PROBABILITY_DELTA; && originalEncodedProbability < MIN_VALID_ENCODED_PROBABILITY) { return MIN_VALID_ENCODED_PROBABILITY; } return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP, MAX_BIGRAM_PROBABILITY_DELTA); return min(originalEncodedProbability + ENCODED_PROBABILITY_STEP, MAX_ENCODED_PROBABILITY); } } /* static */ int ForgettingCurveUtils::isValidUnigram(const int encodedUnigramProbability) { return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY; /* static */ int ForgettingCurveUtils::isValidEncodedProbability(const int encodedProbability) { return encodedProbability >= MIN_VALID_ENCODED_PROBABILITY; } /* static */ int ForgettingCurveUtils::isValidBigram(const int encodedBigramProbabilityDelta) { return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA; /* static */ int ForgettingCurveUtils::getEncodedProbabilityToSave(const int encodedProbability) { const int currentEncodedProbability = max(min(encodedProbability, MAX_ENCODED_PROBABILITY), 0); // TODO: Implement the decay in more proper way. const float currentRate = static_cast<float>(currentEncodedProbability) / static_cast<float>(MAX_ENCODED_PROBABILITY); const float thresholdToDecay = MIN_PROBABILITY_TO_DECAY + (1.0f - MIN_PROBABILITY_TO_DECAY) * (1.0f - currentRate); const float randValue = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); if (thresholdToDecay < randValue) { return max(currentEncodedProbability - ENCODED_PROBABILITY_STEP, 0); } else { return currentEncodedProbability; } } /* static */ int ForgettingCurveUtils::decodeUnigramProbability(const int encodedProbability) { const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY; const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; if (probability < 0) { return NOT_A_PROBABILITY; } else { return min(probability, MAX_UNIGRAM_PROBABILITY); return min(probability, MAX_ENCODED_PROBABILITY) * 8; } } /* static */ int ForgettingCurveUtils::decodeBigramProbabilityDelta( const int encodedProbabilityDelta) { const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA; if (probabilityDelta < 0) { /* static */ int ForgettingCurveUtils::decodeBigramProbability(const int encodedProbability) { const int probability = encodedProbability - MIN_VALID_ENCODED_PROBABILITY; if (probability < 0) { return NOT_A_PROBABILITY; } else { return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA); return min(probability, MAX_ENCODED_PROBABILITY) * 8; } } /* static */ int ForgettingCurveUtils::getDecayedProbability(const int rawProbability) { return rawProbability; // See comments in ProbabilityUtils::backoff(). /* static */ int ForgettingCurveUtils::backoff(const int unigramProbability) { if (unigramProbability == NOT_A_PROBABILITY) { return NOT_A_PROBABILITY; } else { return max(unigramProbability - 8, 0); } } } // namespace latinime
native/jni/src/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h +11 −20 Original line number Diff line number Diff line Loading @@ -24,7 +24,6 @@ namespace latinime { // TODO: Check the elapsed time and decrease the probability depending on the time. Time field is // required to introduced to each terminal PtNode and bigram entry. // TODO: Quit using bigram probability to indicate the delta. // TODO: Quit using bigram probability delta. class ForgettingCurveUtils { public: static const int MAX_UNIGRAM_COUNT; Loading @@ -33,38 +32,30 @@ class ForgettingCurveUtils { static const int MAX_BIGRAM_COUNT_AFTER_GC; static int getProbability(const int encodedUnigramProbability, const int encodedBigramProbabilityDelta); const int encodedBigramProbability); static int getUpdatedUnigramProbability(const int originalEncodedProbability, static int getUpdatedEncodedProbability(const int originalEncodedProbability, const int newProbability); static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta, const int newProbability); static int isValidUnigram(const int encodedUnigramProbability); static int isValidBigram(const int encodedProbabilityDelta); static int isValidEncodedProbability(const int encodedProbability); static int getUnigramProbabilityToSave(const int encodedProbability); static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta); static int getEncodedProbabilityToSave(const int encodedProbability); private: DISALLOW_IMPLICIT_CONSTRUCTORS(ForgettingCurveUtils); static const int MAX_COMPUTED_PROBABILITY; static const int MAX_UNIGRAM_PROBABILITY; static const int MIN_VALID_UNIGRAM_PROBABILITY; static const int UNIGRAM_PROBABILITY_STEP; static const int MAX_BIGRAM_PROBABILITY_DELTA; static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA; static const int BIGRAM_PROBABILITY_DELTA_STEP; static const int MAX_ENCODED_PROBABILITY; static const int MIN_VALID_ENCODED_PROBABILITY; static const int ENCODED_PROBABILITY_STEP; static const float MIN_PROBABILITY_TO_DECAY; static int decodeUnigramProbability(const int encodedProbability); static int decodeBigramProbabilityDelta(const int encodedProbability); static int decodeBigramProbability(const int encodedProbability); static int getDecayedProbability(const int rawProbability); static int backoff(const int unigramProbability); }; } // namespace latinime #endif /* LATINIME_FORGETTING_CURVE_UTILS_H */