Loading java/src/com/android/inputmethod/latin/BinaryDictionary.java +4 −15 Original line number Diff line number Diff line Loading @@ -44,9 +44,9 @@ public final class BinaryDictionary extends Dictionary { private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; // Must be equal to MAX_RESULTS in native/jni/src/defines.h private static final int MAX_RESULTS = 18; // Required space count for auto commit. // TODO: Remove this heuristic. private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3; // The cutoff returned by native for auto-commit confidence. // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; @UsedForTesting public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; Loading Loading @@ -343,18 +343,7 @@ public final class BinaryDictionary extends Dictionary { @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // TODO: actually use the confidence rather than use this completely broken heuristic final String word = candidate.mWord; final int length = word.length(); int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT; for (int i = 0; i < length; ++i) { // This is okay because no low-surrogate and no high-surrogate can ever match the // space character, so we don't need to take care of iterating on code points. if (Constants.CODE_SPACE == word.charAt(i)) { if (0 >= --remainingSpaces) return true; } } return false; return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT; } @Override Loading native/jni/src/defines.h +11 −1 Original line number Diff line number Diff line Loading @@ -298,9 +298,19 @@ static inline void prof_out(void) { #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define NOT_A_DICT_POS (S_INT_MIN) // A special value to mean the first word confidence makes no sense in this case, // e.g. this is not a multi-word suggestion. #define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN) #define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX) // How high the confidence needs to be for us to auto-commit. Arbitrary. // This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java #define CONFIDENCE_FOR_AUTO_COMMIT (1000000) // 80% of the full confidence #define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) // 100% of the full confidence #define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT) // 80% of the full confidence #define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) #define KEYCODE_SPACE ' ' #define KEYCODE_SINGLE_QUOTE '\'' Loading native/jni/src/suggest/core/dicnode/dic_node.h +10 −0 Original line number Diff line number Diff line Loading @@ -321,6 +321,16 @@ class DicNode { DUMP_WORD_AND_SCORE("OUTPUT"); } // "Total" in this context (and other methods in this class) means the whole suggestion. When // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only // the one that corresponds to the last word of the suggestion, and all the previous words // are concatenated together in mPrevWord - which contains a space at the end. int getTotalNodeSpaceCount() const { if (isFirstWord()) return 0; return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()); } int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); if (inputIndex == NOT_AN_INDEX) { Loading native/jni/src/suggest/core/suggest.cpp +54 −4 Original line number Diff line number Diff line Loading @@ -166,7 +166,11 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // TODO: have partial commit work even with multiple pointers. const bool outputSecondWordFirstLetterInputIndex = traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(); if (terminalSize > 0) { // If we have no suggestions, don't write this outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(&terminals[0]); } // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; Loading Loading @@ -255,9 +259,55 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen return outputWordIndex; } int Suggest::computeFirstWordConfidence() const { // TODO: implement this. return NOT_A_FIRST_WORD_CONFIDENCE; int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { // Get the number of spaces in the first suggestion const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); // Get the number of characters in the first suggestion const int length = terminalDicNode->getTotalNodeCodePointCount(); // Get the distance for the first word of the suggestion const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means // we are very confident. // Expected space count is 1 ~ 5 static const int MIN_EXPECTED_SPACE_COUNT = 1; static const int MAX_EXPECTED_SPACE_COUNT = 5; // Expected length is about 4 ~ 30 static const int MIN_EXPECTED_LENGTH = 4; static const int MAX_EXPECTED_LENGTH = 30; // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 static const float MIN_EXPECTED_DISTANCE = 0.0; static const float MAX_EXPECTED_DISTANCE = 2.0; // This is not strict: it's where most stuff will be falling, but it's still fine if it's // outside these values. We want to output a value that reflects all of these. Each factor // contributes a bit. // We need at least a space. if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the // weight of the distance. Clamp to avoid overflows. const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT * (MAX_EXPECTED_DISTANCE - clampedDistance) / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the // weight of the space count. const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT * (spaceCount - MIN_EXPECTED_SPACE_COUNT) / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); return distanceContribution + lengthContribution + spaceContribution; } /** Loading native/jni/src/suggest/core/suggest.h +1 −1 Original line number Diff line number Diff line Loading @@ -58,7 +58,7 @@ class Suggest : public SuggestInterface { int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const; int computeFirstWordConfidence() const; int computeFirstWordConfidence(const DicNode *const terminalDicNode) const; void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; Loading Loading
java/src/com/android/inputmethod/latin/BinaryDictionary.java +4 −15 Original line number Diff line number Diff line Loading @@ -44,9 +44,9 @@ public final class BinaryDictionary extends Dictionary { private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; // Must be equal to MAX_RESULTS in native/jni/src/defines.h private static final int MAX_RESULTS = 18; // Required space count for auto commit. // TODO: Remove this heuristic. private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3; // The cutoff returned by native for auto-commit confidence. // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; @UsedForTesting public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; Loading Loading @@ -343,18 +343,7 @@ public final class BinaryDictionary extends Dictionary { @Override public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { // TODO: actually use the confidence rather than use this completely broken heuristic final String word = candidate.mWord; final int length = word.length(); int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT; for (int i = 0; i < length; ++i) { // This is okay because no low-surrogate and no high-surrogate can ever match the // space character, so we don't need to take care of iterating on code points. if (Constants.CODE_SPACE == word.charAt(i)) { if (0 >= --remainingSpaces) return true; } } return false; return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT; } @Override Loading
native/jni/src/defines.h +11 −1 Original line number Diff line number Diff line Loading @@ -298,9 +298,19 @@ static inline void prof_out(void) { #define NOT_AN_INDEX (-1) #define NOT_A_PROBABILITY (-1) #define NOT_A_DICT_POS (S_INT_MIN) // A special value to mean the first word confidence makes no sense in this case, // e.g. this is not a multi-word suggestion. #define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MIN) #define NOT_A_FIRST_WORD_CONFIDENCE (S_INT_MAX) // How high the confidence needs to be for us to auto-commit. Arbitrary. // This needs to be the same as CONFIDENCE_FOR_AUTO_COMMIT in BinaryDictionary.java #define CONFIDENCE_FOR_AUTO_COMMIT (1000000) // 80% of the full confidence #define DISTANCE_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) // 100% of the full confidence #define LENGTH_WEIGHT_FOR_AUTO_COMMIT (CONFIDENCE_FOR_AUTO_COMMIT) // 80% of the full confidence #define SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT (80 * CONFIDENCE_FOR_AUTO_COMMIT / 100) #define KEYCODE_SPACE ' ' #define KEYCODE_SINGLE_QUOTE '\'' Loading
native/jni/src/suggest/core/dicnode/dic_node.h +10 −0 Original line number Diff line number Diff line Loading @@ -321,6 +321,16 @@ class DicNode { DUMP_WORD_AND_SCORE("OUTPUT"); } // "Total" in this context (and other methods in this class) means the whole suggestion. When // this represents a multi-word suggestion, the referenced PtNode (in mDicNodeState) is only // the one that corresponds to the last word of the suggestion, and all the previous words // are concatenated together in mPrevWord - which contains a space at the end. int getTotalNodeSpaceCount() const { if (isFirstWord()) return 0; return CharUtils::getSpaceCount(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength()); } int getSecondWordFirstInputIndex(const ProximityInfoState *const pInfoState) const { const int inputIndex = mDicNodeState.mDicNodeStatePrevWord.getSecondWordFirstInputIndex(); if (inputIndex == NOT_AN_INDEX) { Loading
native/jni/src/suggest/core/suggest.cpp +54 −4 Original line number Diff line number Diff line Loading @@ -166,7 +166,11 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen // TODO: have partial commit work even with multiple pointers. const bool outputSecondWordFirstLetterInputIndex = traverseSession->isOnlyOnePointerUsed(0 /* pointerId */); outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(); if (terminalSize > 0) { // If we have no suggestions, don't write this outputAutoCommitFirstWordConfidence[0] = computeFirstWordConfidence(&terminals[0]); } // Output suggestion results here for (int terminalIndex = 0; terminalIndex < terminalSize && outputWordIndex < MAX_RESULTS; Loading Loading @@ -255,9 +259,55 @@ int Suggest::outputSuggestions(DicTraverseSession *traverseSession, int *frequen return outputWordIndex; } int Suggest::computeFirstWordConfidence() const { // TODO: implement this. return NOT_A_FIRST_WORD_CONFIDENCE; int Suggest::computeFirstWordConfidence(const DicNode *const terminalDicNode) const { // Get the number of spaces in the first suggestion const int spaceCount = terminalDicNode->getTotalNodeSpaceCount(); // Get the number of characters in the first suggestion const int length = terminalDicNode->getTotalNodeCodePointCount(); // Get the distance for the first word of the suggestion const float distance = terminalDicNode->getNormalizedCompoundDistanceAfterFirstWord(); // Arbitrarily, we give a score whose useful values range from 0 to 1,000,000. // 1,000,000 will be the cutoff to auto-commit. It's fine if the number is under 0 or // above 1,000,000 : under 0 just means it's very bad to commit, and above 1,000,000 means // we are very confident. // Expected space count is 1 ~ 5 static const int MIN_EXPECTED_SPACE_COUNT = 1; static const int MAX_EXPECTED_SPACE_COUNT = 5; // Expected length is about 4 ~ 30 static const int MIN_EXPECTED_LENGTH = 4; static const int MAX_EXPECTED_LENGTH = 30; // Expected distance is about 0.2 ~ 2.0, but consider 0.0 ~ 2.0 static const float MIN_EXPECTED_DISTANCE = 0.0; static const float MAX_EXPECTED_DISTANCE = 2.0; // This is not strict: it's where most stuff will be falling, but it's still fine if it's // outside these values. We want to output a value that reflects all of these. Each factor // contributes a bit. // We need at least a space. if (spaceCount < 1) return NOT_A_FIRST_WORD_CONFIDENCE; // The smaller the edit distance, the higher the contribution. MIN_EXPECTED_DISTANCE means 0 // contribution, while MAX_EXPECTED_DISTANCE means full contribution according to the // weight of the distance. Clamp to avoid overflows. const float clampedDistance = distance < MIN_EXPECTED_DISTANCE ? MIN_EXPECTED_DISTANCE : distance > MAX_EXPECTED_DISTANCE ? MAX_EXPECTED_DISTANCE : distance; const int distanceContribution = DISTANCE_WEIGHT_FOR_AUTO_COMMIT * (MAX_EXPECTED_DISTANCE - clampedDistance) / (MAX_EXPECTED_DISTANCE - MIN_EXPECTED_DISTANCE); // The larger the suggestion length, the larger the contribution. MIN_EXPECTED_LENGTH is no // contribution, MAX_EXPECTED_LENGTH is full contribution according to the weight of the // length. Length is guaranteed to be between 1 and 48, so we don't need to clamp. const int lengthContribution = LENGTH_WEIGHT_FOR_AUTO_COMMIT * (length - MIN_EXPECTED_LENGTH) / (MAX_EXPECTED_LENGTH - MIN_EXPECTED_LENGTH); // The more spaces, the larger the contribution. MIN_EXPECTED_SPACE_COUNT space is no // contribution, MAX_EXPECTED_SPACE_COUNT spaces is full contribution according to the // weight of the space count. const int spaceContribution = SPACE_COUNT_WEIGHT_FOR_AUTO_COMMIT * (spaceCount - MIN_EXPECTED_SPACE_COUNT) / (MAX_EXPECTED_SPACE_COUNT - MIN_EXPECTED_SPACE_COUNT); return distanceContribution + lengthContribution + spaceContribution; } /** Loading
native/jni/src/suggest/core/suggest.h +1 −1 Original line number Diff line number Diff line Loading @@ -58,7 +58,7 @@ class Suggest : public SuggestInterface { int outputSuggestions(DicTraverseSession *traverseSession, int *frequencies, int *outputCodePoints, int *outputIndicesToPartialCommit, int *outputTypes, int *outputAutoCommitFirstWordConfidence) const; int computeFirstWordConfidence() const; int computeFirstWordConfidence(const DicNode *const terminalDicNode) const; void initializeSearch(DicTraverseSession *traverseSession, int commitPoint) const; void expandCurrentDicNodes(DicTraverseSession *traverseSession) const; void processTerminalDicNode(DicTraverseSession *traverseSession, DicNode *dicNode) const; Loading