Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bc77adef authored by Jean Chalard's avatar Jean Chalard Committed by Android (Google) Code Review
Browse files

Merge "Return the bigram frequency if available." into jb-dev

parents 5748a7ce 9416c814
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -74,9 +74,9 @@
        <item></item>
        <!-- Modest : Suggestion whose normalized score is greater than this value
             will be subject to auto-correction. -->
        <item>0.22</item>
        <item>0.185</item>
        <!-- Aggressive -->
        <item>0.08</item>
        <item>0.067</item>
        <!-- Very Aggressive : Suggestion whose normalized score is greater than this value
             will be subject to auto-correction. -->
        <item>0</item>
+23 −9
Original line number Diff line number Diff line
@@ -520,19 +520,33 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a
    return 0;
}

// This should probably return a probability in log space.
static inline int backoff(const int unigramFreq) {
    return unigramFreq;
    // For some reason, applying the backoff weight gives bad results in tests. To apply the
    // backoff weight, we divide the probability by 2, which in our storing format means
    // decreasing the score by 8.
    // TODO: figure out what's wrong with this.
    // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8);
}

// This returns a probability in log space.
inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap,
        const uint8_t *bigramFilter, const int unigramFreq) {
    if (!bigramMap || !bigramFilter) return unigramFreq;
    if (!isInFilter(bigramFilter, position)) return unigramFreq;
    const std::map<int, int>::const_iterator bigramFreq = bigramMap->find(position);
    if (bigramFreq != bigramMap->end()) {
        // TODO: return the frequency in bigramFreq->second
        return unigramFreq;
    if (!bigramMap || !bigramFilter) return backoff(unigramFreq);
    if (!isInFilter(bigramFilter, position)) return backoff(unigramFreq);
    const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position);
    if (bigramFreqIt != bigramMap->end()) {
        const int bigramFreq = bigramFreqIt->second;
        // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the
        // unigram frequency to be the median value of the 17th step from the top. A value of
        // 0 for the bigram frequency represents the middle of the 16th step from the top,
        // while a value of 15 represents the middle of the top step.
        // See makedict.BinaryDictInputOutput for details.
        const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ);
        return (int)(unigramFreq + bigramFreq * stepSize);
    } else {
        return unigramFreq;
        return backoff(unigramFreq);
    }
    // TODO: if the unigram frequency is used, compute the actual probability
}

} // namespace latinime
+3 −2
Original line number Diff line number Diff line
@@ -207,6 +207,7 @@ static inline void prof_out(void) {
#define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f
#define HALF_SCORE_SQUARED_RADIUS 32.0f
#define MAX_FREQ 255
#define MAX_BIGRAM_FREQ 15

// This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
// This is only used for the size of array. Not to be used in c functions.
@@ -225,8 +226,8 @@ static inline void prof_out(void) {
#define MULTIPLE_WORDS_DEMOTION_RATE 80
#define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6

#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39
#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22
#define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35
#define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185

#define MAX_DEPTH_MULTIPLIER 3