Loading java/src/com/android/inputmethod/latin/AutoCorrection.java +3 −2 Original line number Diff line number Diff line Loading @@ -118,8 +118,9 @@ public class AutoCorrection { final int autoCorrectionSuggestionScore = sortedScores[0]; // TODO: when the normalized score of the first suggestion is nearly equals to // the normalized score of the second suggestion, behave less aggressive. mNormalizedScore = Utils.calcNormalizedScore( typedWord,autoCorrectionSuggestion, autoCorrectionSuggestionScore); mNormalizedScore = BinaryDictionary.calcNormalizedScore( typedWord.toString(), autoCorrectionSuggestion.toString(), autoCorrectionSuggestionScore); if (DBG) { Log.d(TAG, "Normalized " + typedWord + "," + autoCorrectionSuggestion + "," + autoCorrectionSuggestionScore + ", " + mNormalizedScore Loading java/src/com/android/inputmethod/latin/BinaryDictionary.java +14 −0 Original line number Diff line number Diff line Loading @@ -118,6 +118,10 @@ public class BinaryDictionary extends Dictionary { private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength, int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, int maxWordLength, int maxBigrams, int maxAlternatives); private static native double calcNormalizedScoreNative( char[] before, int beforeLength, char[] after, int afterLength, int score); private static native int editDistanceNative( char[] before, int beforeLength, char[] after, int afterLength); private final void loadDictionary(String path, long startOffset, long length) { mNativeDict = openNative(path, startOffset, length, Loading Loading @@ -211,6 +215,16 @@ public class BinaryDictionary extends Dictionary { mFlags, outputChars, scores); } public static double calcNormalizedScore(String before, String after, int score) { return calcNormalizedScoreNative(before.toCharArray(), before.length(), after.toCharArray(), after.length(), score); } public static int editDistance(String before, String after) { return editDistanceNative( before.toCharArray(), before.length(), after.toCharArray(), after.length()); } @Override public boolean isValidWord(CharSequence word) { if (word == null) return false; Loading java/src/com/android/inputmethod/latin/Utils.java +2 −93 Original line number Diff line number Diff line Loading @@ -191,7 +191,8 @@ public class Utils { final int typedWordLength = typedWord.length(); final int maxEditDistanceOfNativeDictionary = (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; final int distance = Utils.editDistance(typedWord, suggestionWord); final int distance = BinaryDictionary.editDistance( typedWord.toString(), suggestionWord.toString()); if (DBG) { Log.d(TAG, "Autocorrected edit distance = " + distance + ", " + maxEditDistanceOfNativeDictionary); Loading Loading @@ -323,49 +324,6 @@ public class Utils { } } /* Damerau-Levenshtein distance */ public static int editDistance(CharSequence s, CharSequence t) { if (s == null || t == null) { throw new IllegalArgumentException("editDistance: Arguments should not be null."); } final int sl = s.length(); final int tl = t.length(); int[][] dp = new int [sl + 1][tl + 1]; for (int i = 0; i <= sl; i++) { dp[i][0] = i; } for (int j = 0; j <= tl; j++) { dp[0][j] = j; } for (int i = 0; i < sl; ++i) { for (int j = 0; j < tl; ++j) { final char sc = Character.toLowerCase(s.charAt(i)); final char tc = Character.toLowerCase(t.charAt(j)); final int cost = sc == tc ? 0 : 1; dp[i + 1][j + 1] = Math.min( dp[i][j + 1] + 1, Math.min(dp[i + 1][j] + 1, dp[i][j] + cost)); // Overwrite for transposition cases if (i > 0 && j > 0 && sc == Character.toLowerCase(t.charAt(j - 1)) && tc == Character.toLowerCase(s.charAt(i - 1))) { dp[i + 1][j + 1] = Math.min(dp[i + 1][j + 1], dp[i - 1][j - 1] + cost); } } } if (DBG_EDIT_DISTANCE) { Log.d(TAG, "editDistance:" + s + "," + t); for (int i = 0; i < dp.length; ++i) { StringBuffer sb = new StringBuffer(); for (int j = 0; j < dp[i].length; ++j) { sb.append(dp[i][j]).append(','); } Log.d(TAG, i + ":" + sb.toString()); } } return dp[sl][tl]; } // Get the current stack trace public static String getStackTrace() { StringBuilder sb = new StringBuilder(); Loading @@ -379,55 +337,6 @@ public class Utils { return sb.toString(); } // In dictionary.cpp, getSuggestion() method, // suggestion scores are computed using the below formula. // original score // := pow(mTypedLetterMultiplier (this is defined 2), // (the number of matched characters between typed word and suggested word)) // * (individual word's score which defined in the unigram dictionary, // and this score is defined in range [0, 255].) // Then, the following processing is applied. // - If the dictionary word is matched up to the point of the user entry // (full match up to min(before.length(), after.length()) // => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) // - If the word is a true full match except for differences in accents or // capitalization, then treat it as if the score was 255. // - If before.length() == after.length() // => multiply by mFullWordMultiplier (this is defined 2)) // So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2 // For historical reasons we ignore the 1.2 modifier (because the measure for a good // autocorrection threshold was done at a time when it didn't exist). This doesn't change // the result. // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. private static final int MAX_INITIAL_SCORE = 255; private static final int TYPED_LETTER_MULTIPLIER = 2; private static final int FULL_WORD_MULTIPLIER = 2; private static final int S_INT_MAX = 2147483647; public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) { final int beforeLength = before.length(); final int afterLength = after.length(); if (beforeLength == 0 || afterLength == 0) return 0; final int distance = editDistance(before, after); // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character // correction. int spaceCount = 0; for (int i = 0; i < afterLength; ++i) { if (after.charAt(i) == Keyboard.CODE_SPACE) { ++spaceCount; } } if (spaceCount == afterLength) return 0; final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE * Math.pow( TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, // so, 0 <= distance / afterLength <= 1 final double weight = 1.0 - (double) distance / afterLength; return (score / maximumScore) * weight; } public static class UsabilityStudyLogUtils { private static final String USABILITY_TAG = UsabilityStudyLogUtils.class.getSimpleName(); private static final String FILENAME = "log.txt"; Loading java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +5 −4 Original line number Diff line number Diff line Loading @@ -270,7 +270,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService // make the threshold. final String wordString = new String(word, wordOffset, wordLength); final double normalizedScore = Utils.calcNormalizedScore(mOriginalText, wordString, score); BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); if (normalizedScore < mSuggestionThreshold) { if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); return true; Loading Loading @@ -303,8 +303,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService hasRecommendedSuggestions = false; } else { gatheredSuggestions = EMPTY_STRING_ARRAY; final double normalizedScore = Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore); final double normalizedScore = BinaryDictionary.calcNormalizedScore( mOriginalText, mBestSuggestion, mBestScore); hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); } } else { Loading Loading @@ -338,7 +338,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService final int bestScore = mScores[mLength - 1]; final CharSequence bestSuggestion = mSuggestions.get(0); final double normalizedScore = Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore); BinaryDictionary.calcNormalizedScore( mOriginalText, bestSuggestion.toString(), bestScore); hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); if (DBG) { Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); Loading native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +28 −1 Original line number Diff line number Diff line Loading @@ -18,6 +18,7 @@ #define LOG_TAG "LatinIME: jni: BinaryDictionary" #include "binary_format.h" #include "correction.h" #include "com_android_inputmethod_latin_BinaryDictionary.h" #include "dictionary.h" #include "jni.h" Loading Loading @@ -188,6 +189,29 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec return result; } static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object, jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) { jchar *beforeChars = env->GetCharArrayElements(before, 0); jchar *afterChars = env->GetCharArrayElements(after, 0); jdouble result = Correction::RankingAlgorithm::calcNormalizedScore( (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength, score); env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT); env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT); return result; } static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jcharArray before, jint beforeLength, jcharArray after, jint afterLength) { jchar *beforeChars = env->GetCharArrayElements(before, 0); jchar *afterChars = env->GetCharArrayElements(after, 0); jint result = Correction::RankingAlgorithm::editDistance( (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength); env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT); env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT); return result; } static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return; Loading Loading @@ -222,7 +246,10 @@ static JNINativeMethod sMethods[] = { {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams} {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)D", (void*)latinime_BinaryDictionary_calcNormalizedScore}, {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance} }; int register_BinaryDictionary(JNIEnv *env) { Loading Loading
java/src/com/android/inputmethod/latin/AutoCorrection.java +3 −2 Original line number Diff line number Diff line Loading @@ -118,8 +118,9 @@ public class AutoCorrection { final int autoCorrectionSuggestionScore = sortedScores[0]; // TODO: when the normalized score of the first suggestion is nearly equals to // the normalized score of the second suggestion, behave less aggressive. mNormalizedScore = Utils.calcNormalizedScore( typedWord,autoCorrectionSuggestion, autoCorrectionSuggestionScore); mNormalizedScore = BinaryDictionary.calcNormalizedScore( typedWord.toString(), autoCorrectionSuggestion.toString(), autoCorrectionSuggestionScore); if (DBG) { Log.d(TAG, "Normalized " + typedWord + "," + autoCorrectionSuggestion + "," + autoCorrectionSuggestionScore + ", " + mNormalizedScore Loading
java/src/com/android/inputmethod/latin/BinaryDictionary.java +14 −0 Original line number Diff line number Diff line Loading @@ -118,6 +118,10 @@ public class BinaryDictionary extends Dictionary { private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength, int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, int maxWordLength, int maxBigrams, int maxAlternatives); private static native double calcNormalizedScoreNative( char[] before, int beforeLength, char[] after, int afterLength, int score); private static native int editDistanceNative( char[] before, int beforeLength, char[] after, int afterLength); private final void loadDictionary(String path, long startOffset, long length) { mNativeDict = openNative(path, startOffset, length, Loading Loading @@ -211,6 +215,16 @@ public class BinaryDictionary extends Dictionary { mFlags, outputChars, scores); } public static double calcNormalizedScore(String before, String after, int score) { return calcNormalizedScoreNative(before.toCharArray(), before.length(), after.toCharArray(), after.length(), score); } public static int editDistance(String before, String after) { return editDistanceNative( before.toCharArray(), before.length(), after.toCharArray(), after.length()); } @Override public boolean isValidWord(CharSequence word) { if (word == null) return false; Loading
java/src/com/android/inputmethod/latin/Utils.java +2 −93 Original line number Diff line number Diff line Loading @@ -191,7 +191,8 @@ public class Utils { final int typedWordLength = typedWord.length(); final int maxEditDistanceOfNativeDictionary = (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; final int distance = Utils.editDistance(typedWord, suggestionWord); final int distance = BinaryDictionary.editDistance( typedWord.toString(), suggestionWord.toString()); if (DBG) { Log.d(TAG, "Autocorrected edit distance = " + distance + ", " + maxEditDistanceOfNativeDictionary); Loading Loading @@ -323,49 +324,6 @@ public class Utils { } } /* Damerau-Levenshtein distance */ public static int editDistance(CharSequence s, CharSequence t) { if (s == null || t == null) { throw new IllegalArgumentException("editDistance: Arguments should not be null."); } final int sl = s.length(); final int tl = t.length(); int[][] dp = new int [sl + 1][tl + 1]; for (int i = 0; i <= sl; i++) { dp[i][0] = i; } for (int j = 0; j <= tl; j++) { dp[0][j] = j; } for (int i = 0; i < sl; ++i) { for (int j = 0; j < tl; ++j) { final char sc = Character.toLowerCase(s.charAt(i)); final char tc = Character.toLowerCase(t.charAt(j)); final int cost = sc == tc ? 0 : 1; dp[i + 1][j + 1] = Math.min( dp[i][j + 1] + 1, Math.min(dp[i + 1][j] + 1, dp[i][j] + cost)); // Overwrite for transposition cases if (i > 0 && j > 0 && sc == Character.toLowerCase(t.charAt(j - 1)) && tc == Character.toLowerCase(s.charAt(i - 1))) { dp[i + 1][j + 1] = Math.min(dp[i + 1][j + 1], dp[i - 1][j - 1] + cost); } } } if (DBG_EDIT_DISTANCE) { Log.d(TAG, "editDistance:" + s + "," + t); for (int i = 0; i < dp.length; ++i) { StringBuffer sb = new StringBuffer(); for (int j = 0; j < dp[i].length; ++j) { sb.append(dp[i][j]).append(','); } Log.d(TAG, i + ":" + sb.toString()); } } return dp[sl][tl]; } // Get the current stack trace public static String getStackTrace() { StringBuilder sb = new StringBuilder(); Loading @@ -379,55 +337,6 @@ public class Utils { return sb.toString(); } // In dictionary.cpp, getSuggestion() method, // suggestion scores are computed using the below formula. // original score // := pow(mTypedLetterMultiplier (this is defined 2), // (the number of matched characters between typed word and suggested word)) // * (individual word's score which defined in the unigram dictionary, // and this score is defined in range [0, 255].) // Then, the following processing is applied. // - If the dictionary word is matched up to the point of the user entry // (full match up to min(before.length(), after.length()) // => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined 1.2) // - If the word is a true full match except for differences in accents or // capitalization, then treat it as if the score was 255. // - If before.length() == after.length() // => multiply by mFullWordMultiplier (this is defined 2)) // So, maximum original score is pow(2, min(before.length(), after.length())) * 255 * 2 * 1.2 // For historical reasons we ignore the 1.2 modifier (because the measure for a good // autocorrection threshold was done at a time when it didn't exist). This doesn't change // the result. // So, we can normalize original score by dividing pow(2, min(b.l(),a.l())) * 255 * 2. private static final int MAX_INITIAL_SCORE = 255; private static final int TYPED_LETTER_MULTIPLIER = 2; private static final int FULL_WORD_MULTIPLIER = 2; private static final int S_INT_MAX = 2147483647; public static double calcNormalizedScore(CharSequence before, CharSequence after, int score) { final int beforeLength = before.length(); final int afterLength = after.length(); if (beforeLength == 0 || afterLength == 0) return 0; final int distance = editDistance(before, after); // If afterLength < beforeLength, the algorithm is suggesting a word by excessive character // correction. int spaceCount = 0; for (int i = 0; i < afterLength; ++i) { if (after.charAt(i) == Keyboard.CODE_SPACE) { ++spaceCount; } } if (spaceCount == afterLength) return 0; final double maximumScore = score == S_INT_MAX ? S_INT_MAX : MAX_INITIAL_SCORE * Math.pow( TYPED_LETTER_MULTIPLIER, Math.min(beforeLength, afterLength - spaceCount)) * FULL_WORD_MULTIPLIER; // add a weight based on edit distance. // distance <= max(afterLength, beforeLength) == afterLength, // so, 0 <= distance / afterLength <= 1 final double weight = 1.0 - (double) distance / afterLength; return (score / maximumScore) * weight; } public static class UsabilityStudyLogUtils { private static final String USABILITY_TAG = UsabilityStudyLogUtils.class.getSimpleName(); private static final String FILENAME = "log.txt"; Loading
java/src/com/android/inputmethod/latin/spellcheck/AndroidSpellCheckerService.java +5 −4 Original line number Diff line number Diff line Loading @@ -270,7 +270,7 @@ public class AndroidSpellCheckerService extends SpellCheckerService // make the threshold. final String wordString = new String(word, wordOffset, wordLength); final double normalizedScore = Utils.calcNormalizedScore(mOriginalText, wordString, score); BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); if (normalizedScore < mSuggestionThreshold) { if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); return true; Loading Loading @@ -303,8 +303,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService hasRecommendedSuggestions = false; } else { gatheredSuggestions = EMPTY_STRING_ARRAY; final double normalizedScore = Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore); final double normalizedScore = BinaryDictionary.calcNormalizedScore( mOriginalText, mBestSuggestion, mBestScore); hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); } } else { Loading Loading @@ -338,7 +338,8 @@ public class AndroidSpellCheckerService extends SpellCheckerService final int bestScore = mScores[mLength - 1]; final CharSequence bestSuggestion = mSuggestions.get(0); final double normalizedScore = Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore); BinaryDictionary.calcNormalizedScore( mOriginalText, bestSuggestion.toString(), bestScore); hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); if (DBG) { Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); Loading
native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +28 −1 Original line number Diff line number Diff line Loading @@ -18,6 +18,7 @@ #define LOG_TAG "LatinIME: jni: BinaryDictionary" #include "binary_format.h" #include "correction.h" #include "com_android_inputmethod_latin_BinaryDictionary.h" #include "dictionary.h" #include "jni.h" Loading Loading @@ -188,6 +189,29 @@ static jboolean latinime_BinaryDictionary_isValidWord(JNIEnv *env, jobject objec return result; } static jdouble latinime_BinaryDictionary_calcNormalizedScore(JNIEnv *env, jobject object, jcharArray before, jint beforeLength, jcharArray after, jint afterLength, jint score) { jchar *beforeChars = env->GetCharArrayElements(before, 0); jchar *afterChars = env->GetCharArrayElements(after, 0); jdouble result = Correction::RankingAlgorithm::calcNormalizedScore( (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength, score); env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT); env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT); return result; } static jint latinime_BinaryDictionary_editDistance(JNIEnv *env, jobject object, jcharArray before, jint beforeLength, jcharArray after, jint afterLength) { jchar *beforeChars = env->GetCharArrayElements(before, 0); jchar *afterChars = env->GetCharArrayElements(after, 0); jint result = Correction::RankingAlgorithm::editDistance( (unsigned short*)beforeChars, beforeLength, (unsigned short*)afterChars, afterLength); env->ReleaseCharArrayElements(before, beforeChars, JNI_ABORT); env->ReleaseCharArrayElements(after, afterChars, JNI_ABORT); return result; } static void latinime_BinaryDictionary_close(JNIEnv *env, jobject object, jlong dict) { Dictionary *dictionary = (Dictionary*)dict; if (!dictionary) return; Loading Loading @@ -222,7 +246,10 @@ static JNINativeMethod sMethods[] = { {"closeNative", "(J)V", (void*)latinime_BinaryDictionary_close}, {"getSuggestionsNative", "(JJ[I[I[III[C[I)I", (void*)latinime_BinaryDictionary_getSuggestions}, {"isValidWordNative", "(J[CI)Z", (void*)latinime_BinaryDictionary_isValidWord}, {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams} {"getBigramsNative", "(J[CI[II[C[IIII)I", (void*)latinime_BinaryDictionary_getBigrams}, {"calcNormalizedScoreNative", "([CI[CII)D", (void*)latinime_BinaryDictionary_calcNormalizedScore}, {"editDistanceNative", "([CI[CI)I", (void*)latinime_BinaryDictionary_editDistance} }; int register_BinaryDictionary(JNIEnv *env) { Loading