Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 60021bbd authored by Keisuke Kuroyanagi's avatar Keisuke Kuroyanagi
Browse files

Enable Quadgram for personalized dicts.

Before:
Total words: 1134659, Success Num: 944709, Success Percentage: 83.259%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1258, Bad Failure Percentage: 0.111%
Failures, with auto-correction (F-C): 28013, F-C Percentage: 2.469%
Max Keystrokes: 6072844, Min Keystrokes: 3347332, Keystroke Saving Percentage:44.880%

After:
Total words: 1134665, Success Num: 945026, Success Percentage: 83.287%
Bad Failures, with auto-correction (typed word == expected word, output word != expected word): 1271, Bad Failure Percentage: 0.112%
Failures, with auto-correction (F-C): 27756, F-C Percentage: 2.446%
Max Keystrokes: 6072850, Min Keystrokes: 3290996, Keystroke Saving Percentage:45.808%

Change-Id: I16af52a3e9c371b95fd6f0741f45ee6b2443bea6
parent 78212a6d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -179,7 +179,7 @@ public final class Constants {

    // (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported in Java side. Needs to modify
    // MAX_PREV_WORD_COUNT_FOR_N_GRAM in native/jni/src/defines.h for suggestions.
    public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 2;
    public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;

    // Key events coming any faster than this are long-presses.
    public static final int LONG_PRESS_MILLISECONDS = 200;
+1 −1
Original line number Diff line number Diff line
@@ -275,7 +275,7 @@ static inline void showStackTrace() {
#define MAX_POINTER_COUNT_G 2

// (MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1)-gram is supported.
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 2
#define MAX_PREV_WORD_COUNT_FOR_N_GRAM 3

#define DISALLOW_DEFAULT_CONSTRUCTOR(TypeName) \
  TypeName() = delete
+4 −3
Original line number Diff line number Diff line
@@ -31,10 +31,11 @@ const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::DATE_KEY = "date";
const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
        {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT"};
        {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"};
const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
        {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT"};
const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000};
        {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT",
                "MAX_QUADGRAM_ENTRY_COUNT"};
const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000};
const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
// Historical info is information that is needed to support decaying such as timestamp, level and
// count.
+4 −3
Original line number Diff line number Diff line
@@ -19,12 +19,13 @@
namespace latinime {

// Used to provide stable probabilities even if the user's input count is small.
const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2};
const int DynamicLanguageModelProbabilityUtils::ASSUMED_MIN_COUNTS[] = {8192, 2, 2, 1};

// Encoded backoff weights.
// Note that we give positive value for trigrams that means the weight is more than 1.
// Note that we give positive values for trigrams and quadgrams that means the weight is more than
// 1.
// TODO: Apply backoff for main dictionaries and quit giving a positive backoff weight.
const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, 0, 8};
const int DynamicLanguageModelProbabilityUtils::ENCODED_BACKOFF_WEIGHTS[] = {-32, -4, 2, 8};

// This value is used to remove too old entries from the dictionary.
const int DynamicLanguageModelProbabilityUtils::DURATION_TO_DISCARD_ENTRY_IN_SECONDS =
+1 −1
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ class DynamicLanguageModelProbabilityUtils {
private:
    DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicLanguageModelProbabilityUtils);

    static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 2, "Max supported Ngram is Trigram.");
    static_assert(MAX_PREV_WORD_COUNT_FOR_N_GRAM <= 3, "Max supported Ngram is Quadgram.");

    static const int ASSUMED_MIN_COUNTS[];
    static const int ENCODED_BACKOFF_WEIGHTS[];
Loading