Loading java/res/values/config.xml +2 −2 Original line number Diff line number Diff line Loading @@ -74,9 +74,9 @@ <item></item> <!-- Modest : Suggestion whose normalized score is greater than this value will be subject to auto-correction. --> <item>0.22</item> <item>0.185</item> <!-- Aggressive --> <item>0.08</item> <item>0.067</item> <!-- Very Aggressive : Suggestion whose normalized score is greater than this value will be subject to auto-correction. --> <item>0</item> Loading native/jni/src/binary_format.h +23 −9 Original line number Diff line number Diff line Loading @@ -520,19 +520,33 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a return 0; } // This should probably return a probability in log space. static inline int backoff(const int unigramFreq) { return unigramFreq; // For some reason, applying the backoff weight gives bad results in tests. To apply the // backoff weight, we divide the probability by 2, which in our storing format means // decreasing the score by 8. // TODO: figure out what's wrong with this. // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8); } // This returns a probability in log space. inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq) { if (!bigramMap || !bigramFilter) return unigramFreq; if (!isInFilter(bigramFilter, position)) return unigramFreq; const std::map<int, int>::const_iterator bigramFreq = bigramMap->find(position); if (bigramFreq != bigramMap->end()) { // TODO: return the frequency in bigramFreq->second return unigramFreq; if (!bigramMap || !bigramFilter) return backoff(unigramFreq); if (!isInFilter(bigramFilter, position)) return backoff(unigramFreq); const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position); if (bigramFreqIt != bigramMap->end()) { const int bigramFreq = bigramFreqIt->second; // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the // unigram frequency to be the median value of the 17th step from the top. A value of // 0 for the bigram frequency represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. // See makedict.BinaryDictInputOutput for details. const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); return (int)(unigramFreq + bigramFreq * stepSize); } else { return unigramFreq; return backoff(unigramFreq); } // TODO: if the unigram frequency is used, compute the actual probability } } // namespace latinime Loading native/jni/src/defines.h +3 −2 Original line number Diff line number Diff line Loading @@ -207,6 +207,7 @@ static inline void prof_out(void) { #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_FREQ 255 #define MAX_BIGRAM_FREQ 15 // This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. Loading @@ -225,8 +226,8 @@ static inline void prof_out(void) { #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185 #define MAX_DEPTH_MULTIPLIER 3 Loading Loading
java/res/values/config.xml +2 −2 Original line number Diff line number Diff line Loading @@ -74,9 +74,9 @@ <item></item> <!-- Modest : Suggestion whose normalized score is greater than this value will be subject to auto-correction. --> <item>0.22</item> <item>0.185</item> <!-- Aggressive --> <item>0.08</item> <item>0.067</item> <!-- Very Aggressive : Suggestion whose normalized score is greater than this value will be subject to auto-correction. --> <item>0</item> Loading
native/jni/src/binary_format.h +23 −9 Original line number Diff line number Diff line Loading @@ -520,19 +520,33 @@ inline int BinaryFormat::getWordAtAddress(const uint8_t* const root, const int a return 0; } // This should probably return a probability in log space. static inline int backoff(const int unigramFreq) { return unigramFreq; // For some reason, applying the backoff weight gives bad results in tests. To apply the // backoff weight, we divide the probability by 2, which in our storing format means // decreasing the score by 8. // TODO: figure out what's wrong with this. // return unigramFreq > 8 ? unigramFreq - 8 : (0 == unigramFreq ? 0 : 8); } // This returns a probability in log space. inline int BinaryFormat::getProbability(const int position, const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, const int unigramFreq) { if (!bigramMap || !bigramFilter) return unigramFreq; if (!isInFilter(bigramFilter, position)) return unigramFreq; const std::map<int, int>::const_iterator bigramFreq = bigramMap->find(position); if (bigramFreq != bigramMap->end()) { // TODO: return the frequency in bigramFreq->second return unigramFreq; if (!bigramMap || !bigramFilter) return backoff(unigramFreq); if (!isInFilter(bigramFilter, position)) return backoff(unigramFreq); const std::map<int, int>::const_iterator bigramFreqIt = bigramMap->find(position); if (bigramFreqIt != bigramMap->end()) { const int bigramFreq = bigramFreqIt->second; // We divide the range [unigramFreq..255] in 16.5 steps - in other words, we want the // unigram frequency to be the median value of the 17th step from the top. A value of // 0 for the bigram frequency represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. // See makedict.BinaryDictInputOutput for details. const float stepSize = ((float)MAX_FREQ - unigramFreq) / (1.5f + MAX_BIGRAM_FREQ); return (int)(unigramFreq + bigramFreq * stepSize); } else { return unigramFreq; return backoff(unigramFreq); } // TODO: if the unigram frequency is used, compute the actual probability } } // namespace latinime Loading
native/jni/src/defines.h +3 −2 Original line number Diff line number Diff line Loading @@ -207,6 +207,7 @@ static inline void prof_out(void) { #define NEUTRAL_SCORE_SQUARED_RADIUS 8.0f #define HALF_SCORE_SQUARED_RADIUS 32.0f #define MAX_FREQ 255 #define MAX_BIGRAM_FREQ 15 // This must be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This is only used for the size of array. Not to be used in c functions. Loading @@ -225,8 +226,8 @@ static inline void prof_out(void) { #define MULTIPLE_WORDS_DEMOTION_RATE 80 #define MIN_INPUT_LENGTH_FOR_THREE_OR_MORE_WORDS_CORRECTION 6 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.39 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.22 #define TWO_WORDS_CORRECTION_WITH_OTHER_ERROR_THRESHOLD 0.35 #define START_TWO_WORDS_CORRECTION_THRESHOLD 0.185 #define MAX_DEPTH_MULTIPLIER 3 Loading